Spaces:

yashgori20
/

evolusis

Sleeping

yashgori20 commited on Nov 10

Commit

f7af463

1 Parent(s): fc6bf99

Improve audio transcription with temp file approach and enhanced logging

- Use temporary file approach for more reliable audio handling
- Add sample_rate=16000 parameter (optimal for speech recognition)
- Enhanced API error logging with response status and body
- Add file size validation warnings
- Proper cleanup of temporary files
- More detailed error tracking for Groq API calls

This addresses potential Streamlit UploadedFile handling issues.

Files changed (1) hide show

app.py +51 -17

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import logging
 import json
 import re
 import copy
 from datetime import datetime
 from typing import Optional, Dict, Any, List
 from collections import deque
@@ -93,6 +94,7 @@ class ToolRegistry:
     @staticmethod
     def transcribe_audio(audio_file) -> Optional[str]:
         """Transcribe audio using Whisper Large V3 Turbo"""
         try:
             logger.info("=== Starting audio transcription ===")
@@ -110,13 +112,13 @@ class ToolRegistry:
             filename = getattr(audio_file, 'name', 'audio.wav')
             logger.info(f"Original filename: {filename}")
-            # Ensure filename has an extension
-            if not any(filename.lower().endswith(ext) for ext in ['.wav', '.mp3', '.webm', '.m4a', '.ogg']):
-                filename = 'audio.wav'
-                logger.info(f"Updated filename to: {filename}")
-            # Read the file contents and create tuple as per Groq API documentation
-            # Format: (filename, file_contents)
             file_contents = audio_file.read()
             file_size = len(file_contents)
             logger.info(f"Read {file_size} bytes from audio file")
@@ -125,14 +127,38 @@ class ToolRegistry:
                 logger.error("Audio file is empty (0 bytes)")
                 return None
-            logger.info(f"Sending to Groq API - Model: whisper-large-v3-turbo, File: {filename}, Size: {file_size} bytes")
-            transcription = groq_client.audio.transcriptions.create(
-                file=(filename, file_contents),
-                model="whisper-large-v3-turbo",
-                response_format="text",
-                temperature=0.0
-            )
             result = str(transcription)
             logger.info(f"Transcription successful. Length: {len(result)} characters")
@@ -145,6 +171,14 @@ class ToolRegistry:
             logger.error(f"Error message: {str(e)}")
             logger.error(f"Full error details:", exc_info=True)
             return None
     @staticmethod
     def get_weather(city: str) -> Optional[Dict[str, Any]]:
@@ -813,8 +847,8 @@ if True:
         # Chat input and audio upload
         st.markdown("---")
-        # Audio recording for speech-to-text
-        audio_input = st.audio_input("🎤 Click to speak")
         if audio_input:
             with st.spinner("🎧 Transcribing your voice..."):
                 try:
@@ -911,8 +945,8 @@ if True:
         # Continue conversation input
         st.markdown("---")
-        # Audio recording for follow-up
-        audio_input = st.audio_input("🎤 Click to speak", key="followup_audio")
         if audio_input and not st.session_state.processing:
             with st.spinner("🎧 Transcribing your voice..."):
                 try:

 import json
 import re
 import copy
+import tempfile
 from datetime import datetime
 from typing import Optional, Dict, Any, List
 from collections import deque
     @staticmethod
     def transcribe_audio(audio_file) -> Optional[str]:
         """Transcribe audio using Whisper Large V3 Turbo"""
+        temp_file_path = None
         try:
             logger.info("=== Starting audio transcription ===")
             filename = getattr(audio_file, 'name', 'audio.wav')
             logger.info(f"Original filename: {filename}")
+            # Determine file extension
+            file_ext = '.wav'
+            if any(filename.lower().endswith(ext) for ext in ['.wav', '.mp3', '.webm', '.m4a', '.ogg']):
+                file_ext = os.path.splitext(filename)[1]
+            logger.info(f"Using file extension: {file_ext}")
+            # Read the file contents
             file_contents = audio_file.read()
             file_size = len(file_contents)
             logger.info(f"Read {file_size} bytes from audio file")
                 logger.error("Audio file is empty (0 bytes)")
                 return None
+            # Check if file is too small
+            if file_size < 1000:
+                logger.warning(f"Audio file very small: {file_size} bytes. May be too short.")
+            # Save to temporary file (more reliable approach)
+            with tempfile.NamedTemporaryFile(mode='wb', suffix=file_ext, delete=False) as temp_file:
+                temp_file.write(file_contents)
+                temp_file_path = temp_file.name
+                logger.info(f"Saved audio to temporary file: {temp_file_path}")
+            # Open the temporary file and send to Groq API
+            logger.info(f"Sending to Groq API - Model: whisper-large-v3-turbo, Size: {file_size} bytes")
+            with open(temp_file_path, 'rb') as audio_file_handle:
+                try:
+                    transcription = groq_client.audio.transcriptions.create(
+                        file=(os.path.basename(temp_file_path), audio_file_handle.read()),
+                        model="whisper-large-v3-turbo",
+                        response_format="text",
+                        temperature=0.0
+                    )
+                    logger.info("API call completed successfully")
+                except Exception as api_error:
+                    logger.error(f"Groq API call failed: {type(api_error).__name__}")
+                    logger.error(f"API error details: {str(api_error)}")
+                    # Try to extract more details if it's a Groq API error
+                    if hasattr(api_error, 'response'):
+                        logger.error(f"Response status: {getattr(api_error.response, 'status_code', 'N/A')}")
+                        logger.error(f"Response body: {getattr(api_error.response, 'text', 'N/A')}")
+                    raise  # Re-raise to be caught by outer exception handler
             result = str(transcription)
             logger.info(f"Transcription successful. Length: {len(result)} characters")
             logger.error(f"Error message: {str(e)}")
             logger.error(f"Full error details:", exc_info=True)
             return None
+        finally:
+            # Clean up temporary file
+            if temp_file_path and os.path.exists(temp_file_path):
+                try:
+                    os.unlink(temp_file_path)
+                    logger.info(f"Cleaned up temporary file: {temp_file_path}")
+                except Exception as cleanup_error:
+                    logger.warning(f"Failed to cleanup temp file: {cleanup_error}")
     @staticmethod
     def get_weather(city: str) -> Optional[Dict[str, Any]]:
         # Chat input and audio upload
         st.markdown("---")
+        # Audio recording for speech-to-text (16kHz is optimal for speech recognition)
+        audio_input = st.audio_input("🎤 Click to speak", sample_rate=16000)
         if audio_input:
             with st.spinner("🎧 Transcribing your voice..."):
                 try:
         # Continue conversation input
         st.markdown("---")
+        # Audio recording for follow-up (16kHz is optimal for speech recognition)
+        audio_input = st.audio_input("🎤 Click to speak", key="followup_audio", sample_rate=16000)
         if audio_input and not st.session_state.processing:
             with st.spinner("🎧 Transcribing your voice..."):
                 try: