<?php

namespace App\Services\Youtube;

use Exception;

class YoutubeTranscriptService
{
    public function getTranscript($videoUrl, $defaultLang = 'en')
    {
        try {
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $videoUrl);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_TIMEOUT, 30);
            $response = curl_exec($ch);
            $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);

            // Check for IP block (e.g., status code 403)
            if ($httpcode == 403) {
                return ['error' => 'IP blocked', 'status_code' => 403];
            }

            $matches = [];
            preg_match('/"captionTracks":(\[.*?\])/', $response, $matches);

            if (isset($matches[1])) {
                $captionTracks = json_decode($matches[1], true);
                foreach ($captionTracks as $track) {
                    if (strpos($track['languageCode'], $defaultLang) !== false) {
                        $baseUrl = html_entity_decode($track['baseUrl'], ENT_QUOTES, 'UTF-8');
                        $ch = curl_init();
                        curl_setopt($ch, CURLOPT_URL, $baseUrl);
                        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                        $captions = curl_exec($ch);
                        curl_close($ch);

                        return response()->json([
                            'captions'    => $this->cleanYouTubeTranscript($captions)['text'],
                            'status_code' => 200,
                        ]);
                    }
                }
                // If the default language is not found, return the first available caption
                if (isset($captionTracks[0]['baseUrl'])) {
                    $baseUrl = html_entity_decode($captionTracks[0]['baseUrl'], ENT_QUOTES, 'UTF-8');
                    $ch = curl_init();
                    curl_setopt($ch, CURLOPT_URL, $baseUrl);
                    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                    $captions = curl_exec($ch);
                    curl_close($ch);

                    return $captions;
                } else {
                    return response()->json(['error' => __('No caption tracks found.'), 'status_code' => 404]);
                }
            } else {
                return response()->json(['error' => __('No captionTracks found in the response.'), 'status_code' => 404]);
            }
        } catch (Exception $e) {
            return ['error' => $e->getMessage(), 'status_code' => 500];
        }
    }

    public function cleanYouTubeTranscript($xmlString)
    {
        // Load the XML string
        $xml = simplexml_load_string($xmlString, 'SimpleXMLElement', LIBXML_NOCDATA);

        // Initialize an array to hold the cleaned transcript
        $transcriptArray = [];
        $cleanedTranscript = '';

        // Loop through each <text> element in the XML
        foreach ($xml->text as $textElement) {
            // Extract the start time, duration, and the text content
            $start = (string) $textElement['start'];
            $dur = (string) $textElement['dur'];
            $text = trim((string) $textElement);

            // Remove any unwanted characters or HTML entities
            $text = htmlspecialchars_decode($text);
            $text = preg_replace('/\s+/', ' ', $text); // Replace multiple spaces with a single space

            // Add to the transcript array
            $transcriptArray[] = [
                'start'    => $start,
                'duration' => $dur,
                'text'     => $text,
            ];

            // Append to the cleaned transcript string
            $cleanedTranscript .= $text . ' ';
        }

        // Return both the array and the cleaned string
        return [
            'array' => $transcriptArray,
            'text'  => trim($cleanedTranscript),
        ];
    }

    public function getTranscript2($videoUrl, $retries = 3)
    {
        $attempts = 0;
        while ($attempts < $retries) {
            $attempts++;

            try {
                $ch = curl_init();
                curl_setopt($ch, CURLOPT_URL, $videoUrl);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch, CURLOPT_TIMEOUT, 30);
                $response = curl_exec($ch);
                $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
                curl_close($ch);

                // Check for IP block (e.g., status code 403)
                if ($httpcode == 403) {
                    return ['error' => 'IP blocked', 'status_code' => 403];
                }

                $matches = [];
                preg_match('/"captionTracks":(\[.*?\])/', $response, $matches);

                if (isset($matches[1])) {
                    $captionTracks = json_decode($matches[1], true);
                    if (isset($captionTracks[0]['baseUrl'])) {
                        $baseUrl = html_entity_decode($captionTracks[0]['baseUrl'], ENT_QUOTES, 'UTF-8');
                        $ch = curl_init();
                        curl_setopt($ch, CURLOPT_URL, $baseUrl);
                        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
                        $captions = curl_exec($ch);
                        curl_close($ch);

                        return $captions;
                    } else {
                        throw new Exception('No caption tracks found.');
                    }
                } else {
                    throw new Exception('No captionTracks found in the response.');
                }
            } catch (Exception $e) {
                if ($attempts >= $retries) {
                    return ['error' => $e->getMessage(), 'status_code' => 500];
                }
                sleep(2);
            }
        }
    }
}
