Spaces:

Daksh0505
/

Youtube-Chatbot

Running

App Files Files Community

Daksh0505 commited on Oct 6

Commit

16e31d5

verified ·

1 Parent(s): 34739a6

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -37

app.py CHANGED Viewed

@@ -7,56 +7,57 @@ from youtube_transcript_api import YouTubeTranscriptApi
 import requests
 import os
 # 🔑 Environment variables
 api_key = os.getenv("HF_API_KEY")
 RAPIDAPI_KEY = (os.getenv("RAPIDAPI_KEY") or "").strip()
-# 📋 List Available Languages (RapidAPI → fallback YouTube)
 @st.cache_data
 def list_available_languages(video_id):
-    """List available transcript languages."""
     try:
-        ytt_api = YouTubeTranscriptApi()
-        transcript_list = ytt_api.list(video_id)
-        languages = []
-        for transcript in transcript_list:
             lang_code = transcript.language_code
             lang_name = transcript.language
             is_generated = transcript.is_generated
-            label = f"{lang_name} ({lang_code})" + (" - Auto-generated" if is_generated else " - Manual")
             languages.append((lang_code, label))
         return languages
     except Exception as e:
         st.warning(f"YouTubeTranscriptApi failed to list: {e}")
         return [("en", "English (en) - Default")]
 @st.cache_data
 def get_transcript_youtube(video_id, language_code="en"):
-    """Fetch transcript via YouTubeTranscriptApi."""
     try:
-        ytt_api = YouTubeTranscriptApi()
-        transcript_list = ytt_api.get_transcript(video_id, languages=[language_code])
-        transcript = " ".join([t["text"] for t in transcript_list])
         return transcript
     except Exception as e:
         st.warning(f"YouTubeTranscriptApi failed: {e}")
         return None
 @st.cache_data
 def get_transcript_rapidapi(video_id, language_code="en"):
-    """Fetch transcript via RapidAPI fallback."""
     try:
         url = "https://youtube-transcript3.p.rapidapi.com/"
-        querystring = {"id": video_id, "lang": language_code}
         headers = {
             "x-rapidapi-key": RAPIDAPI_KEY,
             "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
         }
-        response = requests.get(url, headers=headers, params=querystring)
         response.raise_for_status()
         data = response.json()
         transcript = " ".join([item["text"] for item in data.get("transcript", [])])
@@ -65,9 +66,9 @@ def get_transcript_rapidapi(video_id, language_code="en"):
         st.error(f"RapidAPI transcript fetch failed: {e}")
         return None
-# 🧱 Vector Store
 @st.cache_data
 def create_vector_store(transcript):
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -78,8 +79,9 @@ def create_vector_store(transcript):
     )
     return FAISS.from_documents(docs, embeddings)
-# 🧩 Build Model
 def build_model(model_choice, temperature=0.7):
     if model_choice == "Flan-T5 (Free)":
         llm = HuggingFaceEndpoint(
@@ -89,7 +91,6 @@ def build_model(model_choice, temperature=0.7):
             temperature=temperature
         )
         return ChatHuggingFace(llm=llm)
     elif model_choice == "DeepSeek":
         llm = HuggingFaceEndpoint(
             repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
@@ -98,7 +99,6 @@ def build_model(model_choice, temperature=0.7):
             max_new_tokens=500
         )
         return ChatHuggingFace(llm=llm, temperature=temperature)
     elif model_choice == "OpenAI":
         llm = HuggingFaceEndpoint(
             repo_id="openai/gpt-oss-20b",
@@ -108,8 +108,9 @@ def build_model(model_choice, temperature=0.7):
         )
         return ChatHuggingFace(llm=llm, temperature=temperature)
-# 🧾 Prompt Template
 prompt_template = PromptTemplate(
     template=(
         "Answer the question based on the context below.\n\n"
@@ -120,8 +121,9 @@ prompt_template = PromptTemplate(
     input_variables=["context", "question"]
 )
-# 🚀 Streamlit UI
 st.title("🎥 YouTube Transcript Chatbot")
 video_id = st.text_input("🎬 YouTube Video ID", value="lv1_-RER4_I")
@@ -131,23 +133,23 @@ temperature = st.slider("🔥 Temperature", 0, 100, value=50) / 100.0
 source_choice = st.radio(
     "📜 Transcript Source",
-    ["Auto (Try RapidAPI, then YouTubeTranscriptApi)", "RapidAPI", "YouTubeTranscriptApi"]
 )
 if video_id:
     with st.spinner("🔎 Checking available transcript languages..."):
         available_langs = list_available_languages(video_id)
     if available_langs:
-        st.success(f"Found {len(available_langs)} available transcript(s)")
         lang_options = {label: code for code, label in available_langs}
         selected_label = st.selectbox("🌐 Select Transcript Language", options=list(lang_options.keys()))
         language_code = lang_options[selected_label]
     else:
         st.warning("No transcripts found for this video.")
-        language_code = None
-else:
-    language_code = None
 if st.button("🚀 Run Chatbot"):
     if not video_id or not query or not language_code:
         st.warning("Please provide video ID, query, and select a language.")
@@ -166,7 +168,7 @@ if st.button("🚀 Run Chatbot"):
             if not transcript:
                 st.error("❌ Could not fetch transcript from any source.")
             else:
-                st.success(f"✅ Transcript fetched successfully ({len(transcript)} characters).")
                 with st.spinner("⚙️ Generating response..."):
                     retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
@@ -179,12 +181,12 @@ if st.button("🚀 Run Chatbot"):
                     response_text = response.content if hasattr(response, 'content') else str(response)
                     st.text_area("🧩 Model Response", value=response_text, height=400)
-# 📘 Sidebar Info
 with st.sidebar:
     st.header("ℹ️ About this App")
     st.write("""
     - Uses both **RapidAPI** and **YouTubeTranscriptApi**
-    - Detects transcript languages dynamically (RapidAPI first)
     - RAG-based Q&A powered by Hugging Face models
     - Models supported: Flan-T5 (Free), DeepSeek, OpenAI (via HF)
     """)

 import requests
 import os
 # 🔑 Environment variables
 api_key = os.getenv("HF_API_KEY")
 RAPIDAPI_KEY = (os.getenv("RAPIDAPI_KEY") or "").strip()
+# -----------------------------
+# List Available Languages
+# -----------------------------
 @st.cache_data
 def list_available_languages(video_id):
+    """List available transcript languages using YouTubeTranscriptApi"""
+    languages = []
     try:
+        transcripts = YouTubeTranscriptApi.list(video_id)
+        for transcript in transcripts:
             lang_code = transcript.language_code
             lang_name = transcript.language
             is_generated = transcript.is_generated
+            label = f"{lang_name} ({lang_code})" + (" - Auto-generated" if is_generated else "")
             languages.append((lang_code, label))
         return languages
     except Exception as e:
         st.warning(f"YouTubeTranscriptApi failed to list: {e}")
         return [("en", "English (en) - Default")]
+# -----------------------------
+# Fetch transcripts
+# -----------------------------
 @st.cache_data
 def get_transcript_youtube(video_id, language_code="en"):
+    """Fetch transcript via YouTubeTranscriptApi using .list()"""
     try:
+        transcripts = YouTubeTranscriptApi.list(video_id)
+        transcript_obj = transcripts.find_transcript([language_code])
+        transcript_list = transcript_obj.fetch()
+        transcript = " ".join([t.text for t in transcript_list])
         return transcript
     except Exception as e:
         st.warning(f"YouTubeTranscriptApi failed: {e}")
         return None
 @st.cache_data
 def get_transcript_rapidapi(video_id, language_code="en"):
+    """Fetch transcript via RapidAPI"""
     try:
         url = "https://youtube-transcript3.p.rapidapi.com/"
+        querystring = {"id": video_id, "lang": language_code}  # ✅ correct param is "id"
         headers = {
             "x-rapidapi-key": RAPIDAPI_KEY,
             "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
         }
+        response = requests.get(url, headers=headers, params=querystring, timeout=20)
         response.raise_for_status()
         data = response.json()
         transcript = " ".join([item["text"] for item in data.get("transcript", [])])
         st.error(f"RapidAPI transcript fetch failed: {e}")
         return None
+# -----------------------------
+# Vector Store
+# -----------------------------
 @st.cache_data
 def create_vector_store(transcript):
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     )
     return FAISS.from_documents(docs, embeddings)
+# -----------------------------
+# Build Model
+# -----------------------------
 def build_model(model_choice, temperature=0.7):
     if model_choice == "Flan-T5 (Free)":
         llm = HuggingFaceEndpoint(
             temperature=temperature
         )
         return ChatHuggingFace(llm=llm)
     elif model_choice == "DeepSeek":
         llm = HuggingFaceEndpoint(
             repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
             max_new_tokens=500
         )
         return ChatHuggingFace(llm=llm, temperature=temperature)
     elif model_choice == "OpenAI":
         llm = HuggingFaceEndpoint(
             repo_id="openai/gpt-oss-20b",
         )
         return ChatHuggingFace(llm=llm, temperature=temperature)
+# -----------------------------
+# Prompt Template
+# -----------------------------
 prompt_template = PromptTemplate(
     template=(
         "Answer the question based on the context below.\n\n"
     input_variables=["context", "question"]
 )
+# -----------------------------
+# Streamlit UI
+# -----------------------------
 st.title("🎥 YouTube Transcript Chatbot")
 video_id = st.text_input("🎬 YouTube Video ID", value="lv1_-RER4_I")
 source_choice = st.radio(
     "📜 Transcript Source",
+    ["Auto (RapidAPI → YouTubeTranscriptApi)", "RapidAPI", "YouTubeTranscriptApi"]
 )
+# Select language
+language_code = None
 if video_id:
     with st.spinner("🔎 Checking available transcript languages..."):
         available_langs = list_available_languages(video_id)
     if available_langs:
+        st.success(f"Found {len(available_langs)} transcript(s)")
         lang_options = {label: code for code, label in available_langs}
         selected_label = st.selectbox("🌐 Select Transcript Language", options=list(lang_options.keys()))
         language_code = lang_options[selected_label]
     else:
         st.warning("No transcripts found for this video.")
+# Fetch transcript & answer
 if st.button("🚀 Run Chatbot"):
     if not video_id or not query or not language_code:
         st.warning("Please provide video ID, query, and select a language.")
             if not transcript:
                 st.error("❌ Could not fetch transcript from any source.")
             else:
+                st.success(f"✅ Transcript fetched ({len(transcript)} characters).")
                 with st.spinner("⚙️ Generating response..."):
                     retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
                     response_text = response.content if hasattr(response, 'content') else str(response)
                     st.text_area("🧩 Model Response", value=response_text, height=400)
+# Sidebar
 with st.sidebar:
     st.header("ℹ️ About this App")
     st.write("""
     - Uses both **RapidAPI** and **YouTubeTranscriptApi**
+    - Correctly detects transcript languages dynamically
     - RAG-based Q&A powered by Hugging Face models
     - Models supported: Flan-T5 (Free), DeepSeek, OpenAI (via HF)
     """)