Daksh0505 commited on
Commit
16e31d5
Β·
verified Β·
1 Parent(s): 34739a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -37
app.py CHANGED
@@ -7,56 +7,57 @@ from youtube_transcript_api import YouTubeTranscriptApi
7
  import requests
8
  import os
9
 
10
-
11
  # πŸ”‘ Environment variables
12
  api_key = os.getenv("HF_API_KEY")
13
  RAPIDAPI_KEY = (os.getenv("RAPIDAPI_KEY") or "").strip()
14
 
15
-
16
- # πŸ“‹ List Available Languages (RapidAPI β†’ fallback YouTube)
 
17
  @st.cache_data
18
  def list_available_languages(video_id):
19
- """List available transcript languages."""
 
20
  try:
21
- ytt_api = YouTubeTranscriptApi()
22
- transcript_list = ytt_api.list(video_id)
23
- languages = []
24
- for transcript in transcript_list:
25
  lang_code = transcript.language_code
26
  lang_name = transcript.language
27
  is_generated = transcript.is_generated
28
- label = f"{lang_name} ({lang_code})" + (" - Auto-generated" if is_generated else " - Manual")
29
  languages.append((lang_code, label))
30
  return languages
31
  except Exception as e:
32
  st.warning(f"YouTubeTranscriptApi failed to list: {e}")
33
  return [("en", "English (en) - Default")]
34
 
35
-
 
 
36
  @st.cache_data
37
  def get_transcript_youtube(video_id, language_code="en"):
38
- """Fetch transcript via YouTubeTranscriptApi."""
39
  try:
40
- ytt_api = YouTubeTranscriptApi()
41
- transcript_list = ytt_api.get_transcript(video_id, languages=[language_code])
42
- transcript = " ".join([t["text"] for t in transcript_list])
 
43
  return transcript
44
  except Exception as e:
45
  st.warning(f"YouTubeTranscriptApi failed: {e}")
46
  return None
47
 
48
-
49
  @st.cache_data
50
  def get_transcript_rapidapi(video_id, language_code="en"):
51
- """Fetch transcript via RapidAPI fallback."""
52
  try:
53
  url = "https://youtube-transcript3.p.rapidapi.com/"
54
- querystring = {"id": video_id, "lang": language_code}
55
  headers = {
56
  "x-rapidapi-key": RAPIDAPI_KEY,
57
  "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
58
  }
59
- response = requests.get(url, headers=headers, params=querystring)
60
  response.raise_for_status()
61
  data = response.json()
62
  transcript = " ".join([item["text"] for item in data.get("transcript", [])])
@@ -65,9 +66,9 @@ def get_transcript_rapidapi(video_id, language_code="en"):
65
  st.error(f"RapidAPI transcript fetch failed: {e}")
66
  return None
67
 
68
-
69
-
70
- # 🧱 Vector Store
71
  @st.cache_data
72
  def create_vector_store(transcript):
73
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
@@ -78,8 +79,9 @@ def create_vector_store(transcript):
78
  )
79
  return FAISS.from_documents(docs, embeddings)
80
 
81
-
82
- # 🧩 Build Model
 
83
  def build_model(model_choice, temperature=0.7):
84
  if model_choice == "Flan-T5 (Free)":
85
  llm = HuggingFaceEndpoint(
@@ -89,7 +91,6 @@ def build_model(model_choice, temperature=0.7):
89
  temperature=temperature
90
  )
91
  return ChatHuggingFace(llm=llm)
92
-
93
  elif model_choice == "DeepSeek":
94
  llm = HuggingFaceEndpoint(
95
  repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
@@ -98,7 +99,6 @@ def build_model(model_choice, temperature=0.7):
98
  max_new_tokens=500
99
  )
100
  return ChatHuggingFace(llm=llm, temperature=temperature)
101
-
102
  elif model_choice == "OpenAI":
103
  llm = HuggingFaceEndpoint(
104
  repo_id="openai/gpt-oss-20b",
@@ -108,8 +108,9 @@ def build_model(model_choice, temperature=0.7):
108
  )
109
  return ChatHuggingFace(llm=llm, temperature=temperature)
110
 
111
-
112
- # 🧾 Prompt Template
 
113
  prompt_template = PromptTemplate(
114
  template=(
115
  "Answer the question based on the context below.\n\n"
@@ -120,8 +121,9 @@ prompt_template = PromptTemplate(
120
  input_variables=["context", "question"]
121
  )
122
 
123
-
124
- # πŸš€ Streamlit UI
 
125
  st.title("πŸŽ₯ YouTube Transcript Chatbot")
126
 
127
  video_id = st.text_input("🎬 YouTube Video ID", value="lv1_-RER4_I")
@@ -131,23 +133,23 @@ temperature = st.slider("πŸ”₯ Temperature", 0, 100, value=50) / 100.0
131
 
132
  source_choice = st.radio(
133
  "πŸ“œ Transcript Source",
134
- ["Auto (Try RapidAPI, then YouTubeTranscriptApi)", "RapidAPI", "YouTubeTranscriptApi"]
135
  )
136
 
 
 
137
  if video_id:
138
  with st.spinner("πŸ”Ž Checking available transcript languages..."):
139
  available_langs = list_available_languages(video_id)
140
  if available_langs:
141
- st.success(f"Found {len(available_langs)} available transcript(s)")
142
  lang_options = {label: code for code, label in available_langs}
143
  selected_label = st.selectbox("🌐 Select Transcript Language", options=list(lang_options.keys()))
144
  language_code = lang_options[selected_label]
145
  else:
146
  st.warning("No transcripts found for this video.")
147
- language_code = None
148
- else:
149
- language_code = None
150
 
 
151
  if st.button("πŸš€ Run Chatbot"):
152
  if not video_id or not query or not language_code:
153
  st.warning("Please provide video ID, query, and select a language.")
@@ -166,7 +168,7 @@ if st.button("πŸš€ Run Chatbot"):
166
  if not transcript:
167
  st.error("❌ Could not fetch transcript from any source.")
168
  else:
169
- st.success(f"βœ… Transcript fetched successfully ({len(transcript)} characters).")
170
 
171
  with st.spinner("βš™οΈ Generating response..."):
172
  retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
@@ -179,12 +181,12 @@ if st.button("πŸš€ Run Chatbot"):
179
  response_text = response.content if hasattr(response, 'content') else str(response)
180
  st.text_area("🧩 Model Response", value=response_text, height=400)
181
 
182
- # πŸ“˜ Sidebar Info
183
  with st.sidebar:
184
  st.header("ℹ️ About this App")
185
  st.write("""
186
  - Uses both **RapidAPI** and **YouTubeTranscriptApi**
187
- - Detects transcript languages dynamically (RapidAPI first)
188
  - RAG-based Q&A powered by Hugging Face models
189
  - Models supported: Flan-T5 (Free), DeepSeek, OpenAI (via HF)
190
  """)
 
7
  import requests
8
  import os
9
 
 
10
  # πŸ”‘ Environment variables
11
  api_key = os.getenv("HF_API_KEY")
12
  RAPIDAPI_KEY = (os.getenv("RAPIDAPI_KEY") or "").strip()
13
 
14
+ # -----------------------------
15
+ # List Available Languages
16
+ # -----------------------------
17
  @st.cache_data
18
  def list_available_languages(video_id):
19
+ """List available transcript languages using YouTubeTranscriptApi"""
20
+ languages = []
21
  try:
22
+ transcripts = YouTubeTranscriptApi.list(video_id)
23
+ for transcript in transcripts:
 
 
24
  lang_code = transcript.language_code
25
  lang_name = transcript.language
26
  is_generated = transcript.is_generated
27
+ label = f"{lang_name} ({lang_code})" + (" - Auto-generated" if is_generated else "")
28
  languages.append((lang_code, label))
29
  return languages
30
  except Exception as e:
31
  st.warning(f"YouTubeTranscriptApi failed to list: {e}")
32
  return [("en", "English (en) - Default")]
33
 
34
+ # -----------------------------
35
+ # Fetch transcripts
36
+ # -----------------------------
37
  @st.cache_data
38
  def get_transcript_youtube(video_id, language_code="en"):
39
+ """Fetch transcript via YouTubeTranscriptApi using .list()"""
40
  try:
41
+ transcripts = YouTubeTranscriptApi.list(video_id)
42
+ transcript_obj = transcripts.find_transcript([language_code])
43
+ transcript_list = transcript_obj.fetch()
44
+ transcript = " ".join([t.text for t in transcript_list])
45
  return transcript
46
  except Exception as e:
47
  st.warning(f"YouTubeTranscriptApi failed: {e}")
48
  return None
49
 
 
50
  @st.cache_data
51
  def get_transcript_rapidapi(video_id, language_code="en"):
52
+ """Fetch transcript via RapidAPI"""
53
  try:
54
  url = "https://youtube-transcript3.p.rapidapi.com/"
55
+ querystring = {"id": video_id, "lang": language_code} # βœ… correct param is "id"
56
  headers = {
57
  "x-rapidapi-key": RAPIDAPI_KEY,
58
  "x-rapidapi-host": "youtube-transcript3.p.rapidapi.com"
59
  }
60
+ response = requests.get(url, headers=headers, params=querystring, timeout=20)
61
  response.raise_for_status()
62
  data = response.json()
63
  transcript = " ".join([item["text"] for item in data.get("transcript", [])])
 
66
  st.error(f"RapidAPI transcript fetch failed: {e}")
67
  return None
68
 
69
+ # -----------------------------
70
+ # Vector Store
71
+ # -----------------------------
72
  @st.cache_data
73
  def create_vector_store(transcript):
74
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
79
  )
80
  return FAISS.from_documents(docs, embeddings)
81
 
82
+ # -----------------------------
83
+ # Build Model
84
+ # -----------------------------
85
  def build_model(model_choice, temperature=0.7):
86
  if model_choice == "Flan-T5 (Free)":
87
  llm = HuggingFaceEndpoint(
 
91
  temperature=temperature
92
  )
93
  return ChatHuggingFace(llm=llm)
 
94
  elif model_choice == "DeepSeek":
95
  llm = HuggingFaceEndpoint(
96
  repo_id="deepseek-ai/DeepSeek-V3.2-Exp",
 
99
  max_new_tokens=500
100
  )
101
  return ChatHuggingFace(llm=llm, temperature=temperature)
 
102
  elif model_choice == "OpenAI":
103
  llm = HuggingFaceEndpoint(
104
  repo_id="openai/gpt-oss-20b",
 
108
  )
109
  return ChatHuggingFace(llm=llm, temperature=temperature)
110
 
111
+ # -----------------------------
112
+ # Prompt Template
113
+ # -----------------------------
114
  prompt_template = PromptTemplate(
115
  template=(
116
  "Answer the question based on the context below.\n\n"
 
121
  input_variables=["context", "question"]
122
  )
123
 
124
+ # -----------------------------
125
+ # Streamlit UI
126
+ # -----------------------------
127
  st.title("πŸŽ₯ YouTube Transcript Chatbot")
128
 
129
  video_id = st.text_input("🎬 YouTube Video ID", value="lv1_-RER4_I")
 
133
 
134
  source_choice = st.radio(
135
  "πŸ“œ Transcript Source",
136
+ ["Auto (RapidAPI β†’ YouTubeTranscriptApi)", "RapidAPI", "YouTubeTranscriptApi"]
137
  )
138
 
139
+ # Select language
140
+ language_code = None
141
  if video_id:
142
  with st.spinner("πŸ”Ž Checking available transcript languages..."):
143
  available_langs = list_available_languages(video_id)
144
  if available_langs:
145
+ st.success(f"Found {len(available_langs)} transcript(s)")
146
  lang_options = {label: code for code, label in available_langs}
147
  selected_label = st.selectbox("🌐 Select Transcript Language", options=list(lang_options.keys()))
148
  language_code = lang_options[selected_label]
149
  else:
150
  st.warning("No transcripts found for this video.")
 
 
 
151
 
152
+ # Fetch transcript & answer
153
  if st.button("πŸš€ Run Chatbot"):
154
  if not video_id or not query or not language_code:
155
  st.warning("Please provide video ID, query, and select a language.")
 
168
  if not transcript:
169
  st.error("❌ Could not fetch transcript from any source.")
170
  else:
171
+ st.success(f"βœ… Transcript fetched ({len(transcript)} characters).")
172
 
173
  with st.spinner("βš™οΈ Generating response..."):
174
  retriever = create_vector_store(transcript).as_retriever(search_type="mmr", search_kwargs={"k": 5})
 
181
  response_text = response.content if hasattr(response, 'content') else str(response)
182
  st.text_area("🧩 Model Response", value=response_text, height=400)
183
 
184
+ # Sidebar
185
  with st.sidebar:
186
  st.header("ℹ️ About this App")
187
  st.write("""
188
  - Uses both **RapidAPI** and **YouTubeTranscriptApi**
189
+ - Correctly detects transcript languages dynamically
190
  - RAG-based Q&A powered by Hugging Face models
191
  - Models supported: Flan-T5 (Free), DeepSeek, OpenAI (via HF)
192
  """)