Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,16 @@
|
|
| 1 |
# app.py — MCP server using DeepSeek via Hugging Face transformers (or fallback)
|
| 2 |
# - Put this file next to config.py (see example below)
|
| 3 |
-
# -
|
| 4 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
from mcp.server.fastmcp import FastMCP
|
| 7 |
from typing import Optional, List, Tuple, Any, Dict
|
|
@@ -39,7 +48,7 @@ try:
|
|
| 39 |
CLIENT_SECRET,
|
| 40 |
REFRESH_TOKEN,
|
| 41 |
API_BASE,
|
| 42 |
-
LOCAL_MODEL, # e.g. "deepseek
|
| 43 |
)
|
| 44 |
except Exception as e:
|
| 45 |
raise SystemExit(
|
|
@@ -125,16 +134,31 @@ def _normalize_local_path_args(args: Any) -> Any:
|
|
| 125 |
return args
|
| 126 |
|
| 127 |
# ----------------------------
|
| 128 |
-
# DeepSeek / HF model loader
|
| 129 |
# ----------------------------
|
| 130 |
LLM_PIPELINE = None
|
| 131 |
TOKENIZER = None
|
| 132 |
LOADED_MODEL_NAME = None
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def init_deepseek_model():
|
| 135 |
"""
|
| 136 |
Try to load LOCAL_MODEL via transformers.pipeline.
|
| 137 |
-
If
|
|
|
|
| 138 |
"""
|
| 139 |
global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
|
| 140 |
|
|
@@ -149,30 +173,38 @@ def init_deepseek_model():
|
|
| 149 |
return
|
| 150 |
|
| 151 |
try:
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
| 155 |
|
| 156 |
# If model looks like seq2seq (T5/flan) use text2text; else causal
|
| 157 |
seq2seq_keywords = ["flan", "t5", "seq2seq"]
|
| 158 |
-
if any(k in
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
LLM_PIPELINE = pipeline("text2text-generation", model=model, tokenizer=TOKENIZER)
|
| 162 |
-
logger.info("Loaded seq2seq model: %s",
|
| 163 |
else:
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
|
| 167 |
-
logger.info("Loaded causal model: %s",
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
logger.error("Failed to load requested model '%s': %s", LOCAL_MODEL, e)
|
| 171 |
traceback.print_exc()
|
| 172 |
# Try a small CPU-friendly fallback
|
| 173 |
-
fallback = None
|
| 174 |
try:
|
| 175 |
-
# prefer an instruction-friendly small model if possible
|
| 176 |
fallback = "google/flan-t5-small"
|
| 177 |
if "flan" in fallback:
|
| 178 |
TOKENIZER = AutoTokenizer.from_pretrained(fallback, use_fast=True)
|
|
|
|
| 1 |
# app.py — MCP server using DeepSeek via Hugging Face transformers (or fallback)
|
| 2 |
# - Put this file next to config.py (see example below)
|
| 3 |
+
# - Supports LOCAL_MODEL values like:
|
| 4 |
+
# "deepseek/deepseek-r1-0528" or "deepseek/deepseek-r1-0528:free"
|
| 5 |
+
# If a ":revision" suffix is present, it will be passed as the `revision=` argument
|
| 6 |
+
# to transformers.from_pretrained so HF validation is satisfied.
|
| 7 |
+
#
|
| 8 |
+
# - It loads the model via transformers.pipeline if available; otherwise falls back
|
| 9 |
+
# to google/flan-t5-small or rule-based responder.
|
| 10 |
+
#
|
| 11 |
+
# - Developer instruction: when a user pastes a local path (e.g. /mnt/data/...), the
|
| 12 |
+
# chat handler passes the path unchanged to process_document(); tool invocation
|
| 13 |
+
# normalization converts file_path -> file_url (file://...) and optionally file_b64.
|
| 14 |
|
| 15 |
from mcp.server.fastmcp import FastMCP
|
| 16 |
from typing import Optional, List, Tuple, Any, Dict
|
|
|
|
| 48 |
CLIENT_SECRET,
|
| 49 |
REFRESH_TOKEN,
|
| 50 |
API_BASE,
|
| 51 |
+
LOCAL_MODEL, # e.g. "deepseek/deepseek-r1-7b" or "deepseek/deepseek-r1-0528:free"
|
| 52 |
)
|
| 53 |
except Exception as e:
|
| 54 |
raise SystemExit(
|
|
|
|
| 134 |
return args
|
| 135 |
|
| 136 |
# ----------------------------
|
| 137 |
+
# DeepSeek / HF model loader (with revision parsing)
|
| 138 |
# ----------------------------
|
| 139 |
LLM_PIPELINE = None
|
| 140 |
TOKENIZER = None
|
| 141 |
LOADED_MODEL_NAME = None
|
| 142 |
|
| 143 |
+
def _parse_model_and_revision(model_string: str) -> Tuple[str, Optional[str]]:
|
| 144 |
+
"""
|
| 145 |
+
Accepts model strings like:
|
| 146 |
+
- 'owner/repo'
|
| 147 |
+
- 'owner/repo:revision'
|
| 148 |
+
Returns (repo_id, revision_or_none).
|
| 149 |
+
"""
|
| 150 |
+
if ":" in model_string:
|
| 151 |
+
repo_id, revision = model_string.split(":", 1)
|
| 152 |
+
repo_id = repo_id.strip()
|
| 153 |
+
revision = revision.strip()
|
| 154 |
+
return repo_id, revision
|
| 155 |
+
return model_string, None
|
| 156 |
+
|
| 157 |
def init_deepseek_model():
|
| 158 |
"""
|
| 159 |
Try to load LOCAL_MODEL via transformers.pipeline.
|
| 160 |
+
If a ':revision' is present, pass revision=... to from_pretrained to avoid HF repo-id validation errors.
|
| 161 |
+
If loading fails, try a fallback small model (flan-t5-small or distilgpt2).
|
| 162 |
"""
|
| 163 |
global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
|
| 164 |
|
|
|
|
| 173 |
return
|
| 174 |
|
| 175 |
try:
|
| 176 |
+
model_string = LOCAL_MODEL
|
| 177 |
+
repo_id, revision = _parse_model_and_revision(model_string)
|
| 178 |
+
tokenizer_name = LOCAL_TOKENIZER or repo_id
|
| 179 |
+
model_name_for_logging = f"{repo_id}" + (f" (rev={revision})" if revision else "")
|
| 180 |
+
LOADED_MODEL_NAME = model_name_for_logging
|
| 181 |
|
| 182 |
# If model looks like seq2seq (T5/flan) use text2text; else causal
|
| 183 |
seq2seq_keywords = ["flan", "t5", "seq2seq"]
|
| 184 |
+
if any(k in repo_id.lower() for k in seq2seq_keywords):
|
| 185 |
+
if revision:
|
| 186 |
+
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True, revision=revision)
|
| 187 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, revision=revision)
|
| 188 |
+
else:
|
| 189 |
+
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
|
| 190 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(repo_id)
|
| 191 |
LLM_PIPELINE = pipeline("text2text-generation", model=model, tokenizer=TOKENIZER)
|
| 192 |
+
logger.info("Loaded seq2seq model: %s", model_name_for_logging)
|
| 193 |
else:
|
| 194 |
+
if revision:
|
| 195 |
+
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True, revision=revision)
|
| 196 |
+
model = AutoModelForCausalLM.from_pretrained(repo_id, revision=revision)
|
| 197 |
+
else:
|
| 198 |
+
TOKENIZER = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
|
| 199 |
+
model = AutoModelForCausalLM.from_pretrained(repo_id)
|
| 200 |
LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
|
| 201 |
+
logger.info("Loaded causal model: %s", model_name_for_logging)
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
logger.error("Failed to load requested model '%s': %s", LOCAL_MODEL, e)
|
| 205 |
traceback.print_exc()
|
| 206 |
# Try a small CPU-friendly fallback
|
|
|
|
| 207 |
try:
|
|
|
|
| 208 |
fallback = "google/flan-t5-small"
|
| 209 |
if "flan" in fallback:
|
| 210 |
TOKENIZER = AutoTokenizer.from_pretrained(fallback, use_fast=True)
|