Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,8 @@ from pytube import YouTube
|
|
| 14 |
import requests
|
| 15 |
import logging
|
| 16 |
import os
|
|
|
|
|
|
|
| 17 |
nltk.download('punkt')
|
| 18 |
nltk.download('stopwords')
|
| 19 |
|
|
@@ -27,7 +29,6 @@ class VideoAnalytics:
|
|
| 27 |
def __init__(self):
|
| 28 |
"""
|
| 29 |
Initialize the VideoAnalytics object.
|
| 30 |
-
|
| 31 |
Args:
|
| 32 |
hf_token (str): Hugging Face API token.
|
| 33 |
"""
|
|
@@ -39,16 +40,7 @@ class VideoAnalytics:
|
|
| 39 |
# Initialize transcribed text variable
|
| 40 |
self.transcribed_text = ""
|
| 41 |
|
| 42 |
-
|
| 43 |
-
self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
hf_token = os.getenv('HF_TOKEN')
|
| 47 |
-
# Placeholder for Hugging Face API token
|
| 48 |
-
self.hf_token = hf_token # Replace this with the actual Hugging Face API token
|
| 49 |
-
|
| 50 |
-
# Set headers for API requests with Hugging Face token
|
| 51 |
-
self.headers = {"Authorization": f"Bearer {self.hf_token}"}
|
| 52 |
|
| 53 |
# Initialize english text variable
|
| 54 |
self.english_text = ""
|
|
@@ -61,13 +53,86 @@ class VideoAnalytics:
|
|
| 61 |
# Configure logging settings
|
| 62 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def transcribe_video(self, vid: str) -> str:
|
| 65 |
"""
|
| 66 |
Transcribe the audio of the video.
|
| 67 |
-
|
| 68 |
Args:
|
| 69 |
vid (str): Path to the video file.
|
| 70 |
-
|
| 71 |
Returns:
|
| 72 |
str: Transcribed text.
|
| 73 |
"""
|
|
@@ -78,28 +143,23 @@ class VideoAnalytics:
|
|
| 78 |
|
| 79 |
# Write audio to a temporary file
|
| 80 |
audio.write_audiofile("output_audio.mp3")
|
| 81 |
-
audio_file = open("output_audio.mp3", "rb")
|
| 82 |
|
| 83 |
-
# Define a helper function to query the Hugging Face model
|
| 84 |
-
def query(data):
|
| 85 |
-
response = requests.post(self.API_URL, headers=self.headers, data=data)
|
| 86 |
-
return response.json()
|
| 87 |
|
| 88 |
-
#
|
| 89 |
-
|
| 90 |
|
| 91 |
-
|
| 92 |
# Update the transcribed_text attribute with the transcription result
|
| 93 |
-
self.transcribed_text =
|
| 94 |
# Update the translation text into english_text
|
| 95 |
self.english_text = self.translation()
|
| 96 |
# Return the transcribed text
|
| 97 |
-
return
|
| 98 |
|
| 99 |
except Exception as e:
|
| 100 |
logging.error(f"Error transcribing video: {e}")
|
| 101 |
return ""
|
| 102 |
-
|
| 103 |
def generate_video_summary(self) -> str:
|
| 104 |
"""
|
| 105 |
Generate a summary of the transcribed video.
|
|
@@ -365,10 +425,30 @@ class VideoAnalytics:
|
|
| 365 |
# Log any errors that occur during initialization of YouTube object
|
| 366 |
logging.error(f"Error downloading video: {e}")
|
| 367 |
return ""
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
def main(self, video: str = None, input_path: str = None) -> tuple:
|
| 374 |
"""
|
|
@@ -386,7 +466,7 @@ class VideoAnalytics:
|
|
| 386 |
video_ = VideoFileClip(input_path)
|
| 387 |
duration = video_.duration
|
| 388 |
video_.close()
|
| 389 |
-
if round(duration) <= 600:
|
| 390 |
text = self.transcribe_video(input_path)
|
| 391 |
else:
|
| 392 |
return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
|
|
@@ -394,7 +474,7 @@ class VideoAnalytics:
|
|
| 394 |
video_ = VideoFileClip(video)
|
| 395 |
duration = video_.duration
|
| 396 |
video_.close()
|
| 397 |
-
if round(duration) <= 600:
|
| 398 |
text = self.transcribe_video(video)
|
| 399 |
input_path = video
|
| 400 |
else:
|
|
|
|
| 14 |
import requests
|
| 15 |
import logging
|
| 16 |
import os
|
| 17 |
+
from pydub import AudioSegment
|
| 18 |
+
import speech_recognition as sr
|
| 19 |
nltk.download('punkt')
|
| 20 |
nltk.download('stopwords')
|
| 21 |
|
|
|
|
| 29 |
def __init__(self):
|
| 30 |
"""
|
| 31 |
Initialize the VideoAnalytics object.
|
|
|
|
| 32 |
Args:
|
| 33 |
hf_token (str): Hugging Face API token.
|
| 34 |
"""
|
|
|
|
| 40 |
# Initialize transcribed text variable
|
| 41 |
self.transcribed_text = ""
|
| 42 |
|
| 43 |
+
self.r = sr.Recognizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Initialize english text variable
|
| 46 |
self.english_text = ""
|
|
|
|
| 53 |
# Configure logging settings
|
| 54 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 55 |
|
| 56 |
+
def mp3_to_wav(self, mp3_file: str, wav_file: str) -> str:
|
| 57 |
+
"""
|
| 58 |
+
Convert an MP3 audio file to WAV format.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
mp3_file (str): The path to the input MP3 file.
|
| 62 |
+
wav_file (str): The path to save the output WAV file.
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
str: The filename of the converted WAV file.
|
| 66 |
+
|
| 67 |
+
Raises:
|
| 68 |
+
Exception: If there's an error during the conversion process.
|
| 69 |
+
"""
|
| 70 |
+
try:
|
| 71 |
+
# Load the MP3 file
|
| 72 |
+
audio = AudioSegment.from_mp3(mp3_file)
|
| 73 |
+
|
| 74 |
+
# Export the audio to WAV format
|
| 75 |
+
audio.export(wav_file, format="wav")
|
| 76 |
+
|
| 77 |
+
logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
|
| 78 |
+
|
| 79 |
+
return wav_file
|
| 80 |
+
except Exception as e:
|
| 81 |
+
# Log the exception and raise it further
|
| 82 |
+
logging.error(f"Error occurred while converting MP3 to WAV: {e}")
|
| 83 |
+
raise e
|
| 84 |
+
|
| 85 |
+
# Function to recognize speech in the audio file
|
| 86 |
+
def transcribe_audio(self,path):
|
| 87 |
+
"""Transcribe speech from an audio file."""
|
| 88 |
+
try:
|
| 89 |
+
with sr.AudioFile(path) as source:
|
| 90 |
+
audio_listened = r.record(source)
|
| 91 |
+
text = r.recognize_google(audio_listened)
|
| 92 |
+
return text
|
| 93 |
+
except sr.UnknownValueError as e:
|
| 94 |
+
logging.error(f"Speech recognition could not understand audio: {e}")
|
| 95 |
+
return ""
|
| 96 |
+
except sr.RequestError as e:
|
| 97 |
+
logging.error(f"Could not request results from Google Speech Recognition service: {e}")
|
| 98 |
+
return ""
|
| 99 |
+
|
| 100 |
+
# Function to split the audio file into chunks on silence and apply speech recognition
|
| 101 |
+
def get_large_audio_transcription_on_silence(self,path):
|
| 102 |
+
"""Split the large audio file into chunks and apply speech recognition on each chunk."""
|
| 103 |
+
try:
|
| 104 |
+
sound = AudioSegment.from_file(path)
|
| 105 |
+
chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
|
| 106 |
+
folder_name = "audio-chunks"
|
| 107 |
+
|
| 108 |
+
if not os.path.isdir(folder_name):
|
| 109 |
+
os.mkdir(folder_name)
|
| 110 |
+
|
| 111 |
+
whole_text = ""
|
| 112 |
+
|
| 113 |
+
for i, audio_chunk in enumerate(chunks, start=1):
|
| 114 |
+
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
| 115 |
+
audio_chunk.export(chunk_filename, format="wav")
|
| 116 |
+
|
| 117 |
+
text = self.transcribe_audio(chunk_filename)
|
| 118 |
+
|
| 119 |
+
if text:
|
| 120 |
+
text = f"{text.capitalize()}. "
|
| 121 |
+
logging.info(f"Transcribed {chunk_filename}: {text}")
|
| 122 |
+
whole_text += text
|
| 123 |
+
else:
|
| 124 |
+
logging.warning(f"No speech recognized in {chunk_filename}")
|
| 125 |
+
|
| 126 |
+
return whole_text
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logging.error(f"Error processing audio: {e}")
|
| 129 |
+
return ""
|
| 130 |
+
|
| 131 |
def transcribe_video(self, vid: str) -> str:
|
| 132 |
"""
|
| 133 |
Transcribe the audio of the video.
|
|
|
|
| 134 |
Args:
|
| 135 |
vid (str): Path to the video file.
|
|
|
|
| 136 |
Returns:
|
| 137 |
str: Transcribed text.
|
| 138 |
"""
|
|
|
|
| 143 |
|
| 144 |
# Write audio to a temporary file
|
| 145 |
audio.write_audiofile("output_audio.mp3")
|
|
|
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
+
# Replace 'input.mp3' and 'output.wav' with your file paths
|
| 149 |
+
audio_filename = self.mp3_to_wav("output_audio.mp3", 'output.wav')
|
| 150 |
|
| 151 |
+
text = self.get_large_audio_transcription_on_silence(audio_filename)
|
| 152 |
# Update the transcribed_text attribute with the transcription result
|
| 153 |
+
self.transcribed_text = text
|
| 154 |
# Update the translation text into english_text
|
| 155 |
self.english_text = self.translation()
|
| 156 |
# Return the transcribed text
|
| 157 |
+
return text
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
logging.error(f"Error transcribing video: {e}")
|
| 161 |
return ""
|
| 162 |
+
|
| 163 |
def generate_video_summary(self) -> str:
|
| 164 |
"""
|
| 165 |
Generate a summary of the transcribed video.
|
|
|
|
| 425 |
# Log any errors that occur during initialization of YouTube object
|
| 426 |
logging.error(f"Error downloading video: {e}")
|
| 427 |
return ""
|
| 428 |
+
|
| 429 |
+
def save_audio_with_gtts(self, text: str, filename: str) -> str:
|
| 430 |
+
"""
|
| 431 |
+
Generate an audio file from the given text using gTTS and save it.
|
| 432 |
+
|
| 433 |
+
Args:
|
| 434 |
+
text (str): The text to be converted into speech.
|
| 435 |
+
filename (str): The filename (including path) to save the audio file.
|
| 436 |
+
|
| 437 |
+
Returns:
|
| 438 |
+
str: The filename of the saved audio file.
|
| 439 |
+
|
| 440 |
+
Raises:
|
| 441 |
+
Exception: If there's an error during the conversion or saving process.
|
| 442 |
+
"""
|
| 443 |
+
try:
|
| 444 |
+
tts = gTTS(text=text, lang='en')
|
| 445 |
+
tts.save(filename)
|
| 446 |
+
logging.info(f"Audio file saved successfully: {filename}")
|
| 447 |
+
return filename
|
| 448 |
+
except Exception as e:
|
| 449 |
+
# Log the exception and raise it further
|
| 450 |
+
logging.error(f"Error occurred while saving audio: {e}")
|
| 451 |
+
raise e
|
| 452 |
|
| 453 |
def main(self, video: str = None, input_path: str = None) -> tuple:
|
| 454 |
"""
|
|
|
|
| 466 |
video_ = VideoFileClip(input_path)
|
| 467 |
duration = video_.duration
|
| 468 |
video_.close()
|
| 469 |
+
if round(duration) <= 6*600:
|
| 470 |
text = self.transcribe_video(input_path)
|
| 471 |
else:
|
| 472 |
return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
|
|
|
|
| 474 |
video_ = VideoFileClip(video)
|
| 475 |
duration = video_.duration
|
| 476 |
video_.close()
|
| 477 |
+
if round(duration) <= 6*600:
|
| 478 |
text = self.transcribe_video(video)
|
| 479 |
input_path = video
|
| 480 |
else:
|