hebaadel commited on
Commit
552fdcc
·
verified ·
1 Parent(s): 1b0099c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from datasets import load_dataset
3
+ from transformers import pipeline
4
+ import soundfile as sf
5
+ import torch
6
+ import gradio as gr
7
+ import numpy as np
8
+
9
+ def predict_image(image):
10
+ classifier=pipeline(task='image-classification' ,model='google/vit-base-patch16-224')
11
+ prediction=classifier(image)
12
+ result=prediction[0]['label']
13
+ return result
14
+
15
+ def translate_to_arabic(text):
16
+ translator=pipeline(task='translation_en_to_ar' ,model='Helsinki-NLP/opus-mt-en-ar')
17
+ result=translator(text,max_length=100)
18
+ return result[0]['translation_text']
19
+
20
+ def text_to_speech(text):
21
+ model=pipeline(task='text-to-speech', model='MBZUAI/speecht5_tts_clartts_ar')
22
+ embedding_dataset=load_dataset('herwoww/arabic_xvector_embeddings',split='validation')
23
+ speaker_embedding=torch.tensor(embedding_dataset[100]['speaker_embeddings']).unsqueeze(0) ##It becomes a 2-D tensor:
24
+ speech=model(text,forward_params={'speaker_embeddings':speaker_embedding})
25
+
26
+ return (speech['sampling_rate'],np.array(speech['audio'],dtype=np.float32))
27
+
28
+ with gr.Blocks() as demo:
29
+ gr.Markdown("## Image classification, Arabic Translation and text to speech interface")
30
+
31
+ with gr.Row():
32
+ with gr.Column():
33
+ image_input=gr.Image(type="pil",label='upload_image')
34
+ predict_button=gr.Button('predict')
35
+ Prediction_Result=gr.Textbox(label='Prediction Result')
36
+
37
+ predict_button.click(
38
+ fn=predict_image,
39
+ inputs=image_input,
40
+ outputs=Prediction_Result
41
+ )
42
+ with gr.Row():
43
+ with gr.Column():
44
+ translated_text=gr.Textbox(label='Translated Text')
45
+ translated_button=gr.Button('Translate To Arabic')
46
+
47
+ translated_button.click(
48
+ fn=translate_to_arabic,
49
+ inputs=Prediction_Result,
50
+ outputs=translated_text
51
+ )
52
+
53
+ with gr.Row():
54
+ to_speech_button=gr.Button('convert To Audio')
55
+ audio_output=gr.Audio(label='Audio Output')
56
+
57
+ to_speech_button.click(
58
+ fn=text_to_speech,
59
+ inputs=translated_text,
60
+ outputs=audio_output
61
+ )
62
+
63
+ demo.launch(share=True)