yashgori20 commited on
Commit
f7af463
Β·
1 Parent(s): fc6bf99

Improve audio transcription with temp file approach and enhanced logging

Browse files

- Use temporary file approach for more reliable audio handling
- Add sample_rate=16000 parameter (optimal for speech recognition)
- Enhanced API error logging with response status and body
- Add file size validation warnings
- Proper cleanup of temporary files
- More detailed error tracking for Groq API calls

This addresses potential Streamlit UploadedFile handling issues.

Files changed (1) hide show
  1. app.py +51 -17
app.py CHANGED
@@ -12,6 +12,7 @@ import logging
12
  import json
13
  import re
14
  import copy
 
15
  from datetime import datetime
16
  from typing import Optional, Dict, Any, List
17
  from collections import deque
@@ -93,6 +94,7 @@ class ToolRegistry:
93
  @staticmethod
94
  def transcribe_audio(audio_file) -> Optional[str]:
95
  """Transcribe audio using Whisper Large V3 Turbo"""
 
96
  try:
97
  logger.info("=== Starting audio transcription ===")
98
 
@@ -110,13 +112,13 @@ class ToolRegistry:
110
  filename = getattr(audio_file, 'name', 'audio.wav')
111
  logger.info(f"Original filename: {filename}")
112
 
113
- # Ensure filename has an extension
114
- if not any(filename.lower().endswith(ext) for ext in ['.wav', '.mp3', '.webm', '.m4a', '.ogg']):
115
- filename = 'audio.wav'
116
- logger.info(f"Updated filename to: {filename}")
 
117
 
118
- # Read the file contents and create tuple as per Groq API documentation
119
- # Format: (filename, file_contents)
120
  file_contents = audio_file.read()
121
  file_size = len(file_contents)
122
  logger.info(f"Read {file_size} bytes from audio file")
@@ -125,14 +127,38 @@ class ToolRegistry:
125
  logger.error("Audio file is empty (0 bytes)")
126
  return None
127
 
128
- logger.info(f"Sending to Groq API - Model: whisper-large-v3-turbo, File: {filename}, Size: {file_size} bytes")
 
 
129
 
130
- transcription = groq_client.audio.transcriptions.create(
131
- file=(filename, file_contents),
132
- model="whisper-large-v3-turbo",
133
- response_format="text",
134
- temperature=0.0
135
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  result = str(transcription)
138
  logger.info(f"Transcription successful. Length: {len(result)} characters")
@@ -145,6 +171,14 @@ class ToolRegistry:
145
  logger.error(f"Error message: {str(e)}")
146
  logger.error(f"Full error details:", exc_info=True)
147
  return None
 
 
 
 
 
 
 
 
148
 
149
  @staticmethod
150
  def get_weather(city: str) -> Optional[Dict[str, Any]]:
@@ -813,8 +847,8 @@ if True:
813
  # Chat input and audio upload
814
  st.markdown("---")
815
 
816
- # Audio recording for speech-to-text
817
- audio_input = st.audio_input("🎀 Click to speak")
818
  if audio_input:
819
  with st.spinner("🎧 Transcribing your voice..."):
820
  try:
@@ -911,8 +945,8 @@ if True:
911
  # Continue conversation input
912
  st.markdown("---")
913
 
914
- # Audio recording for follow-up
915
- audio_input = st.audio_input("🎀 Click to speak", key="followup_audio")
916
  if audio_input and not st.session_state.processing:
917
  with st.spinner("🎧 Transcribing your voice..."):
918
  try:
 
12
  import json
13
  import re
14
  import copy
15
+ import tempfile
16
  from datetime import datetime
17
  from typing import Optional, Dict, Any, List
18
  from collections import deque
 
94
  @staticmethod
95
  def transcribe_audio(audio_file) -> Optional[str]:
96
  """Transcribe audio using Whisper Large V3 Turbo"""
97
+ temp_file_path = None
98
  try:
99
  logger.info("=== Starting audio transcription ===")
100
 
 
112
  filename = getattr(audio_file, 'name', 'audio.wav')
113
  logger.info(f"Original filename: {filename}")
114
 
115
+ # Determine file extension
116
+ file_ext = '.wav'
117
+ if any(filename.lower().endswith(ext) for ext in ['.wav', '.mp3', '.webm', '.m4a', '.ogg']):
118
+ file_ext = os.path.splitext(filename)[1]
119
+ logger.info(f"Using file extension: {file_ext}")
120
 
121
+ # Read the file contents
 
122
  file_contents = audio_file.read()
123
  file_size = len(file_contents)
124
  logger.info(f"Read {file_size} bytes from audio file")
 
127
  logger.error("Audio file is empty (0 bytes)")
128
  return None
129
 
130
+ # Check if file is too small
131
+ if file_size < 1000:
132
+ logger.warning(f"Audio file very small: {file_size} bytes. May be too short.")
133
 
134
+ # Save to temporary file (more reliable approach)
135
+ with tempfile.NamedTemporaryFile(mode='wb', suffix=file_ext, delete=False) as temp_file:
136
+ temp_file.write(file_contents)
137
+ temp_file_path = temp_file.name
138
+ logger.info(f"Saved audio to temporary file: {temp_file_path}")
139
+
140
+ # Open the temporary file and send to Groq API
141
+ logger.info(f"Sending to Groq API - Model: whisper-large-v3-turbo, Size: {file_size} bytes")
142
+
143
+ with open(temp_file_path, 'rb') as audio_file_handle:
144
+ try:
145
+ transcription = groq_client.audio.transcriptions.create(
146
+ file=(os.path.basename(temp_file_path), audio_file_handle.read()),
147
+ model="whisper-large-v3-turbo",
148
+ response_format="text",
149
+ temperature=0.0
150
+ )
151
+ logger.info("API call completed successfully")
152
+ except Exception as api_error:
153
+ logger.error(f"Groq API call failed: {type(api_error).__name__}")
154
+ logger.error(f"API error details: {str(api_error)}")
155
+
156
+ # Try to extract more details if it's a Groq API error
157
+ if hasattr(api_error, 'response'):
158
+ logger.error(f"Response status: {getattr(api_error.response, 'status_code', 'N/A')}")
159
+ logger.error(f"Response body: {getattr(api_error.response, 'text', 'N/A')}")
160
+
161
+ raise # Re-raise to be caught by outer exception handler
162
 
163
  result = str(transcription)
164
  logger.info(f"Transcription successful. Length: {len(result)} characters")
 
171
  logger.error(f"Error message: {str(e)}")
172
  logger.error(f"Full error details:", exc_info=True)
173
  return None
174
+ finally:
175
+ # Clean up temporary file
176
+ if temp_file_path and os.path.exists(temp_file_path):
177
+ try:
178
+ os.unlink(temp_file_path)
179
+ logger.info(f"Cleaned up temporary file: {temp_file_path}")
180
+ except Exception as cleanup_error:
181
+ logger.warning(f"Failed to cleanup temp file: {cleanup_error}")
182
 
183
  @staticmethod
184
  def get_weather(city: str) -> Optional[Dict[str, Any]]:
 
847
  # Chat input and audio upload
848
  st.markdown("---")
849
 
850
+ # Audio recording for speech-to-text (16kHz is optimal for speech recognition)
851
+ audio_input = st.audio_input("🎀 Click to speak", sample_rate=16000)
852
  if audio_input:
853
  with st.spinner("🎧 Transcribing your voice..."):
854
  try:
 
945
  # Continue conversation input
946
  st.markdown("---")
947
 
948
+ # Audio recording for follow-up (16kHz is optimal for speech recognition)
949
+ audio_input = st.audio_input("🎀 Click to speak", key="followup_audio", sample_rate=16000)
950
  if audio_input and not st.session_state.processing:
951
  with st.spinner("🎧 Transcribing your voice..."):
952
  try: