interviewer

Sleeping

App Files Files Community

IliaLarchenko commited on Apr 23, 2024

Commit

3447ff0

1 Parent(s): 3a90dc2

Added streaming to audio output

Browse files

Files changed (2) hide show

api/audio.py +37 -22
app.py +2 -4

api/audio.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
 import wave
 import requests
@@ -59,27 +60,41 @@ class TTSManager:
     def __init__(self, config):
         self.config = config
-    def text_to_speech(self, text):
-        try:
-            if self.config.tts.type == "OPENAI_API":
-                client = OpenAI(base_url=self.config.tts.url, api_key=self.config.tts.key)
-                response = client.audio.speech.create(model=self.config.tts.name, voice="alloy", response_format="opus", input=text)
-            elif self.config.tts.type == "HF_API":
-                headers = {"Authorization": "Bearer " + self.config.tts.key}
-                response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
-                if response.status_code != 200:
-                    error_details = response.json().get("error", "No error message provided")
-                    raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
-        except APIError as e:
-            raise
-        except Exception as e:
-            raise APIError(f"TTS Error: Unexpected error: {e}")
-        return response.content
     def read_last_message(self, chat_display):
         if chat_display:
-            last_message = chat_display[-1][1]
-            if last_message is not None:
-                return self.text_to_speech(last_message)
-        return None

 import io
+import os
 import wave
 import requests
     def __init__(self, config):
         self.config = config
     def read_last_message(self, chat_display):
         if chat_display:
+            text = chat_display[-1][1]
+            headers = {"Authorization": "Bearer " + self.config.tts.key}
+            try:
+                if self.config.tts.type == "OPENAI_API":
+                    data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
+                    if os.environ.get("STREAMING", False):
+                        with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
+                            if response.status_code != 200:
+                                error_details = response.json().get("error", "No error message provided")
+                                raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
+                            else:
+                                yield from response.iter_content(chunk_size=1024)
+                    else:
+                        response = requests.post(self.config.tts.url, headers=headers, json=data)
+                        if response.status_code != 200:
+                            error_details = response.json().get("error", "No error message provided")
+                            raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
+                        return response.content
+                elif self.config.tts.type == "HF_API":
+                    if os.environ.get("STREAMING", False):
+                        raise APIError("Streaming not supported for HF API TTS")
+                    else:
+                        response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
+                        if response.status_code != 200:
+                            error_details = response.json().get("error", "No error message provided")
+                            raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
+                        return response.content
+            except APIError as e:
+                raise
+            except Exception as e:
+                raise APIError(f"TTS Error: Unexpected error: {e}")
+        else:
+            return None

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ with gr.Blocks(title="AI Interviewer") as demo:
         gr.Markdown(instruction["demo"])
     started_coding = gr.State(False)
-    audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, interactive=False)
     with gr.Tab("Instruction") as instruction_tab:
         with gr.Row():
             with gr.Column(scale=2):
@@ -167,8 +167,6 @@ with gr.Blocks(title="AI Interviewer") as demo:
         outputs=[chat_history, chat, message, previous_code],
     )
-    chat.change(fn=tts.read_last_message, inputs=[chat], outputs=[audio_output], trigger_mode="once")
-    # audio_output.stop(fn=lambda: None, inputs=None, outputs=[audio_output])
 demo.launch(show_api=False)

         gr.Markdown(instruction["demo"])
     started_coding = gr.State(False)
+    audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, streaming=os.environ.get("STREAMING", False))
     with gr.Tab("Instruction") as instruction_tab:
         with gr.Row():
             with gr.Column(scale=2):
         outputs=[chat_history, chat, message, previous_code],
     )
+    chat.change(fn=tts.read_last_message, inputs=[chat], outputs=[audio_output])
 demo.launch(show_api=False)