Spaces:
Sleeping
Sleeping
Commit
Β·
f7af463
1
Parent(s):
fc6bf99
Improve audio transcription with temp file approach and enhanced logging
Browse files- Use temporary file approach for more reliable audio handling
- Add sample_rate=16000 parameter (optimal for speech recognition)
- Enhanced API error logging with response status and body
- Add file size validation warnings
- Proper cleanup of temporary files
- More detailed error tracking for Groq API calls
This addresses potential Streamlit UploadedFile handling issues.
app.py
CHANGED
|
@@ -12,6 +12,7 @@ import logging
|
|
| 12 |
import json
|
| 13 |
import re
|
| 14 |
import copy
|
|
|
|
| 15 |
from datetime import datetime
|
| 16 |
from typing import Optional, Dict, Any, List
|
| 17 |
from collections import deque
|
|
@@ -93,6 +94,7 @@ class ToolRegistry:
|
|
| 93 |
@staticmethod
|
| 94 |
def transcribe_audio(audio_file) -> Optional[str]:
|
| 95 |
"""Transcribe audio using Whisper Large V3 Turbo"""
|
|
|
|
| 96 |
try:
|
| 97 |
logger.info("=== Starting audio transcription ===")
|
| 98 |
|
|
@@ -110,13 +112,13 @@ class ToolRegistry:
|
|
| 110 |
filename = getattr(audio_file, 'name', 'audio.wav')
|
| 111 |
logger.info(f"Original filename: {filename}")
|
| 112 |
|
| 113 |
-
#
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
| 117 |
|
| 118 |
-
# Read the file contents
|
| 119 |
-
# Format: (filename, file_contents)
|
| 120 |
file_contents = audio_file.read()
|
| 121 |
file_size = len(file_contents)
|
| 122 |
logger.info(f"Read {file_size} bytes from audio file")
|
|
@@ -125,14 +127,38 @@ class ToolRegistry:
|
|
| 125 |
logger.error("Audio file is empty (0 bytes)")
|
| 126 |
return None
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
result = str(transcription)
|
| 138 |
logger.info(f"Transcription successful. Length: {len(result)} characters")
|
|
@@ -145,6 +171,14 @@ class ToolRegistry:
|
|
| 145 |
logger.error(f"Error message: {str(e)}")
|
| 146 |
logger.error(f"Full error details:", exc_info=True)
|
| 147 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
@staticmethod
|
| 150 |
def get_weather(city: str) -> Optional[Dict[str, Any]]:
|
|
@@ -813,8 +847,8 @@ if True:
|
|
| 813 |
# Chat input and audio upload
|
| 814 |
st.markdown("---")
|
| 815 |
|
| 816 |
-
# Audio recording for speech-to-text
|
| 817 |
-
audio_input = st.audio_input("π€ Click to speak")
|
| 818 |
if audio_input:
|
| 819 |
with st.spinner("π§ Transcribing your voice..."):
|
| 820 |
try:
|
|
@@ -911,8 +945,8 @@ if True:
|
|
| 911 |
# Continue conversation input
|
| 912 |
st.markdown("---")
|
| 913 |
|
| 914 |
-
# Audio recording for follow-up
|
| 915 |
-
audio_input = st.audio_input("π€ Click to speak", key="followup_audio")
|
| 916 |
if audio_input and not st.session_state.processing:
|
| 917 |
with st.spinner("π§ Transcribing your voice..."):
|
| 918 |
try:
|
|
|
|
| 12 |
import json
|
| 13 |
import re
|
| 14 |
import copy
|
| 15 |
+
import tempfile
|
| 16 |
from datetime import datetime
|
| 17 |
from typing import Optional, Dict, Any, List
|
| 18 |
from collections import deque
|
|
|
|
| 94 |
@staticmethod
|
| 95 |
def transcribe_audio(audio_file) -> Optional[str]:
|
| 96 |
"""Transcribe audio using Whisper Large V3 Turbo"""
|
| 97 |
+
temp_file_path = None
|
| 98 |
try:
|
| 99 |
logger.info("=== Starting audio transcription ===")
|
| 100 |
|
|
|
|
| 112 |
filename = getattr(audio_file, 'name', 'audio.wav')
|
| 113 |
logger.info(f"Original filename: {filename}")
|
| 114 |
|
| 115 |
+
# Determine file extension
|
| 116 |
+
file_ext = '.wav'
|
| 117 |
+
if any(filename.lower().endswith(ext) for ext in ['.wav', '.mp3', '.webm', '.m4a', '.ogg']):
|
| 118 |
+
file_ext = os.path.splitext(filename)[1]
|
| 119 |
+
logger.info(f"Using file extension: {file_ext}")
|
| 120 |
|
| 121 |
+
# Read the file contents
|
|
|
|
| 122 |
file_contents = audio_file.read()
|
| 123 |
file_size = len(file_contents)
|
| 124 |
logger.info(f"Read {file_size} bytes from audio file")
|
|
|
|
| 127 |
logger.error("Audio file is empty (0 bytes)")
|
| 128 |
return None
|
| 129 |
|
| 130 |
+
# Check if file is too small
|
| 131 |
+
if file_size < 1000:
|
| 132 |
+
logger.warning(f"Audio file very small: {file_size} bytes. May be too short.")
|
| 133 |
|
| 134 |
+
# Save to temporary file (more reliable approach)
|
| 135 |
+
with tempfile.NamedTemporaryFile(mode='wb', suffix=file_ext, delete=False) as temp_file:
|
| 136 |
+
temp_file.write(file_contents)
|
| 137 |
+
temp_file_path = temp_file.name
|
| 138 |
+
logger.info(f"Saved audio to temporary file: {temp_file_path}")
|
| 139 |
+
|
| 140 |
+
# Open the temporary file and send to Groq API
|
| 141 |
+
logger.info(f"Sending to Groq API - Model: whisper-large-v3-turbo, Size: {file_size} bytes")
|
| 142 |
+
|
| 143 |
+
with open(temp_file_path, 'rb') as audio_file_handle:
|
| 144 |
+
try:
|
| 145 |
+
transcription = groq_client.audio.transcriptions.create(
|
| 146 |
+
file=(os.path.basename(temp_file_path), audio_file_handle.read()),
|
| 147 |
+
model="whisper-large-v3-turbo",
|
| 148 |
+
response_format="text",
|
| 149 |
+
temperature=0.0
|
| 150 |
+
)
|
| 151 |
+
logger.info("API call completed successfully")
|
| 152 |
+
except Exception as api_error:
|
| 153 |
+
logger.error(f"Groq API call failed: {type(api_error).__name__}")
|
| 154 |
+
logger.error(f"API error details: {str(api_error)}")
|
| 155 |
+
|
| 156 |
+
# Try to extract more details if it's a Groq API error
|
| 157 |
+
if hasattr(api_error, 'response'):
|
| 158 |
+
logger.error(f"Response status: {getattr(api_error.response, 'status_code', 'N/A')}")
|
| 159 |
+
logger.error(f"Response body: {getattr(api_error.response, 'text', 'N/A')}")
|
| 160 |
+
|
| 161 |
+
raise # Re-raise to be caught by outer exception handler
|
| 162 |
|
| 163 |
result = str(transcription)
|
| 164 |
logger.info(f"Transcription successful. Length: {len(result)} characters")
|
|
|
|
| 171 |
logger.error(f"Error message: {str(e)}")
|
| 172 |
logger.error(f"Full error details:", exc_info=True)
|
| 173 |
return None
|
| 174 |
+
finally:
|
| 175 |
+
# Clean up temporary file
|
| 176 |
+
if temp_file_path and os.path.exists(temp_file_path):
|
| 177 |
+
try:
|
| 178 |
+
os.unlink(temp_file_path)
|
| 179 |
+
logger.info(f"Cleaned up temporary file: {temp_file_path}")
|
| 180 |
+
except Exception as cleanup_error:
|
| 181 |
+
logger.warning(f"Failed to cleanup temp file: {cleanup_error}")
|
| 182 |
|
| 183 |
@staticmethod
|
| 184 |
def get_weather(city: str) -> Optional[Dict[str, Any]]:
|
|
|
|
| 847 |
# Chat input and audio upload
|
| 848 |
st.markdown("---")
|
| 849 |
|
| 850 |
+
# Audio recording for speech-to-text (16kHz is optimal for speech recognition)
|
| 851 |
+
audio_input = st.audio_input("π€ Click to speak", sample_rate=16000)
|
| 852 |
if audio_input:
|
| 853 |
with st.spinner("π§ Transcribing your voice..."):
|
| 854 |
try:
|
|
|
|
| 945 |
# Continue conversation input
|
| 946 |
st.markdown("---")
|
| 947 |
|
| 948 |
+
# Audio recording for follow-up (16kHz is optimal for speech recognition)
|
| 949 |
+
audio_input = st.audio_input("π€ Click to speak", key="followup_audio", sample_rate=16000)
|
| 950 |
if audio_input and not st.session_state.processing:
|
| 951 |
with st.spinner("π§ Transcribing your voice..."):
|
| 952 |
try:
|