harshith1411 commited on
Commit
e73bf6b
·
verified ·
1 Parent(s): 206cf28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -40
app.py CHANGED
@@ -1,43 +1,96 @@
1
- import cv2
2
- import os
3
  import torch
4
- from transformers import pipeline
5
- from moviepy.editor import VideoFileClip
6
-
7
- def extract_keyframes(video_path, interval=5):
8
- cap = cv2.VideoCapture(video_path)
9
- frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
10
- keyframes = []
11
- frame_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- while cap.isOpened():
14
- ret, frame = cap.read()
15
- if not ret:
16
- break
17
- if frame_count % (frame_rate * interval) == 0:
18
- keyframes.append(frame)
19
- frame_count += 1
20
 
21
- cap.release()
22
- return keyframes
23
-
24
- def generate_captions(frames):
25
- caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
26
- captions = [caption_generator(frame)[0]['generated_text'] for frame in frames]
27
- return captions
28
-
29
- def summarize_text(texts):
30
- summarizer = pipeline("summarization")
31
- summary = summarizer(" ".join(texts), max_length=50, min_length=10, do_sample=False)
32
- return summary[0]['summary_text']
33
-
34
- def summarize_video(video_path):
35
- frames = extract_keyframes(video_path)
36
- captions = generate_captions(frames)
37
- summary = summarize_text(captions)
38
- return summary
39
-
40
- if __name__ == "__main__":
41
- video_path = "input.mp4" # Change to your video file
42
- summary = summarize_video(video_path)
43
- print("Video Summary:", summary)
 
1
+ import gradio as gr
 
2
  import torch
3
+ import yt_dlp
4
+ import os
5
+ import subprocess
6
+ import json
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ import moviepy.editor as mp
9
+ import langdetect
10
+ import uuid
11
+
12
+ # Load model
13
+ model_path = "Qwen/Qwen2.5-7B-Instruct"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_path, torch_dtype=torch.float16, trust_remote_code=True
17
+ ).cuda().eval()
18
+
19
+ def generate_unique_filename(extension):
20
+ return f"{uuid.uuid4()}{extension}"
21
+
22
+ def download_youtube_audio(url):
23
+ output_path = generate_unique_filename(".wav")
24
+ ydl_opts = {
25
+ 'format': 'bestaudio/best',
26
+ 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
27
+ 'outtmpl': output_path,
28
+ 'keepvideo': False,
29
+ }
30
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
31
+ ydl.download([url])
32
+ return output_path
33
+
34
+ def transcribe_audio(file_path):
35
+ output_file = generate_unique_filename(".json")
36
+ command = [
37
+ "insanely-fast-whisper",
38
+ "--file-name", file_path,
39
+ "--device-id", "0",
40
+ "--model-name", "openai/whisper-large-v3",
41
+ "--task", "transcribe",
42
+ "--timestamp", "chunk",
43
+ "--transcript-path", output_file
44
+ ]
45
+ subprocess.run(command, check=True)
46
+ with open(output_file, "r") as f:
47
+ transcription = json.load(f)
48
+ os.remove(output_file)
49
+ return transcription.get("text", "")
50
+
51
+ def generate_summary(transcription):
52
+ if not transcription.strip():
53
+ return "Error: No transcription available to summarize."
54
+ detected_language = langdetect.detect(transcription)
55
+ prompt = f"Summarize the following text in the detected language ({detected_language}):\n{transcription[:1000]}"
56
+ response, _ = model.chat(tokenizer, prompt, history=[])
57
+ return response
58
+
59
+ def process_youtube(url):
60
+ if not url:
61
+ return "Error: Please enter a valid YouTube URL.", ""
62
+ try:
63
+ audio_file = download_youtube_audio(url)
64
+ transcription = transcribe_audio(audio_file)
65
+ os.remove(audio_file)
66
+ return transcription, ""
67
+ except Exception as e:
68
+ return f"Error processing YouTube: {str(e)}", ""
69
+
70
+ def process_uploaded_video(video_path):
71
+ try:
72
+ transcription = transcribe_audio(video_path)
73
+ return transcription, ""
74
+ except Exception as e:
75
+ return f"Error processing video: {str(e)}", ""
76
+
77
+ demo = gr.Blocks()
78
+ with demo:
79
+ gr.Markdown("## 🎥 Video Summarization Tool")
80
+ with gr.Tabs():
81
+ with gr.TabItem("📤 Upload Video"):
82
+ video_input = gr.File()
83
+ video_button = gr.Button("Process Video")
84
+ with gr.TabItem("🔗 YouTube Link"):
85
+ url_input = gr.Textbox()
86
+ url_button = gr.Button("Process URL")
87
 
88
+ transcription_output = gr.Textbox(label="Transcription", lines=10)
89
+ summary_output = gr.Textbox(label="Summary", lines=10)
90
+ summary_button = gr.Button("Generate Summary")
 
 
 
 
91
 
92
+ video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
93
+ url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
94
+ summary_button.click(generate_summary, inputs=[transcription_output], outputs=[summary_output])
95
+
96
+ demo.launch()