Spaces:

wzy013
/

hunyuanvideo-foley

Running

wzy013 commited on Sep 2

Commit

b3e5ac7

1 Parent(s): 4abc485

Improve API calling robustness and error handling

- Add API endpoint discovery for Gradio Client
- Try multiple possible API endpoints automatically
- Improve HF Inference API error handling and responses
- Add proper token checking and validation
- Better logging and debugging information
- Handle different API response formats properly

Files changed (1) hide show

app.py +92 -28

app.py CHANGED Viewed

@@ -18,26 +18,63 @@ def call_gradio_client_api(video_file, text_prompt, guidance_scale, inference_st
         # 连接到官方Space
         client = Client("tencent/HunyuanVideo-Foley")
         logger.info("发送推理请求...")
-        # 调用推理函数
-        result = client.predict(
-            video_file,  # 视频文件
-            text_prompt,  # 文本提示
-            guidance_scale,  # CFG scale
-            inference_steps,  # 推理步数
-            sample_nums,  # 样本数量
-            api_name="/infer_single_video"  # API端点名称
-        )
-        return result, "✅ 成功通过官方API生成音频!"
     except Exception as e:
         error_msg = str(e)
         logger.error(f"Gradio Client API 调用失败: {error_msg}")
         if "not found" in error_msg.lower():
-            return None, "❌ 官方Space的API端点未找到，可能接口已更改"
         elif "connection" in error_msg.lower():
             return None, "❌ 无法连接到官方Space，请检查网络"
         elif "queue" in error_msg.lower():
@@ -50,39 +87,66 @@ def call_huggingface_inference_api(video_file, text_prompt):
     try:
         logger.info("尝试Hugging Face Inference API...")
-        API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
-        # 读取视频文件
-        with open(video_file, "rb") as f:
-            video_data = f.read()
-        # 准备请求数据
         headers = {
-            "Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}",
         }
         # 发送请求
         response = requests.post(
             API_URL,
             headers=headers,
-            json={"inputs": {"video": base64.b64encode(video_data).decode(), "text": text_prompt}},
-            timeout=300
         )
         if response.status_code == 200:
-            # 保存结果
-            temp_dir = tempfile.mkdtemp()
-            audio_path = os.path.join(temp_dir, "generated_audio.wav")
-            with open(audio_path, 'wb') as f:
-                f.write(response.content)
-            return [audio_path], "✅ 通过Hugging Face API生成成功!"
         else:
-            logger.error(f"HF API错误: {response.status_code}")
-            return None, f"❌ Hugging Face API返回错误: {response.status_code}"
     except Exception as e:
         logger.error(f"HF API调用失败: {str(e)}")
-        return None, f"❌ Hugging Face API调用失败: {str(e)}"
 def try_alternative_apis(video_file, text_prompt):
     """尝试其他可能的API服务"""

         # 连接到官方Space
         client = Client("tencent/HunyuanVideo-Foley")
+        # 首先检查Space的API端点
+        logger.info("检查可用的API端点...")
+        try:
+            # 获取Space的API信息
+            api_info = client.view_api()
+            logger.info(f"可用的API端点: {api_info}")
+        except:
+            logger.warning("无法获取API端点信息")
         logger.info("发送推理请求...")
+        # 尝试不同的API端点名称
+        possible_endpoints = [
+            "/infer_single_video",
+            "/predict",
+            "/generate",
+            None  # 使用默认端点
+        ]
+        for endpoint in possible_endpoints:
+            try:
+                logger.info(f"尝试端点: {endpoint}")
+                if endpoint:
+                    result = client.predict(
+                        video_file,
+                        text_prompt,
+                        guidance_scale,
+                        inference_steps,
+                        sample_nums,
+                        api_name=endpoint
+                    )
+                else:
+                    # 尝试默认调用
+                    result = client.predict(
+                        video_file,
+                        text_prompt,
+                        guidance_scale,
+                        inference_steps,
+                        sample_nums
+                    )
+                logger.info("API调用成功!")
+                return result, "✅ 成功通过官方API生成音频!"
+            except Exception as endpoint_error:
+                logger.warning(f"端点 {endpoint} 失败: {str(endpoint_error)}")
+                continue
+        return None, "❌ 所有API端点都调用失败"
     except Exception as e:
         error_msg = str(e)
         logger.error(f"Gradio Client API 调用失败: {error_msg}")
         if "not found" in error_msg.lower():
+            return None, "❌ 官方Space未找到或不可访问"
         elif "connection" in error_msg.lower():
             return None, "❌ 无法连接到官方Space，请检查网络"
         elif "queue" in error_msg.lower():
     try:
         logger.info("尝试Hugging Face Inference API...")
+        # 检查是否有Token
+        hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGING_FACE_HUB_TOKEN')
+        if not hf_token:
+            return None, "❌ 未配置HF_TOKEN，跳过Inference API"
+        API_URL = "https://api-inference.huggingface.co/models/tencent/HunyuanVideo-Foley"
+        # 准备请求数据 - 简化格式
         headers = {
+            "Authorization": f"Bearer {hf_token}",
+            "Content-Type": "application/json"
         }
+        # 简化的请求数据
+        data = {
+            "inputs": text_prompt,  # 简化输入格式
+            "parameters": {
+                "guidance_scale": 4.5,
+                "num_inference_steps": 50
+            }
+        }
+        logger.info("发送Inference API请求...")
         # 发送请求
         response = requests.post(
             API_URL,
             headers=headers,
+            json=data,
+            timeout=60  # 缩短超时时间
         )
+        logger.info(f"API响应状态码: {response.status_code}")
         if response.status_code == 200:
+            # 检查响应内容类型
+            content_type = response.headers.get('content-type', '')
+            if 'audio' in content_type:
+                # 保存音频结果
+                temp_dir = tempfile.mkdtemp()
+                audio_path = os.path.join(temp_dir, "generated_audio.wav")
+                with open(audio_path, 'wb') as f:
+                    f.write(response.content)
+                return [audio_path], "✅ 通过Hugging Face API生成成功!"
+            else:
+                logger.warning(f"响应不是音频格式: {content_type}")
+                return None, f"❌ API返回了非音频内容: {content_type}"
+        elif response.status_code == 503:
+            return None, "⏳ 模型正在加载中，请稍后重试"
+        elif response.status_code == 401:
+            return None, "❌ HF Token无效或权限不足"
+        elif response.status_code == 404:
+            return None, "❌ 该模型不支持Inference API"
         else:
+            logger.error(f"HF API错误: {response.status_code} - {response.text}")
+            return None, f"❌ HF API错误 {response.status_code}: {response.text[:100]}"
     except Exception as e:
         logger.error(f"HF API调用失败: {str(e)}")
+        return None, f"❌ HF API调用失败: {str(e)}"
 def try_alternative_apis(video_file, text_prompt):
     """尝试其他可能的API服务"""