Spaces:

rudra0410hf
/

chatbotService

Sleeping

App Files Files Community

rudra0410hf commited on Sep 30

Commit

92e4e8d

verified ·

1 Parent(s): 203435a

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -64

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-# app/main.py
 import os
 import time
 import logging
 from typing import Optional
-from fastapi import FastAPI, HTTPException, Query
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("biogpt_chatbot")
@@ -20,7 +17,6 @@ MEDICAL_PROMPTS = {
 You are DermX-AI, a specialized medical AI assistant trained in dermatology.
 Your role is to provide clear, evidence-based information about skin conditions,
 diagnostic insights, and treatment options.
 - Use simple but professional language, suitable for both patients and clinicians.
 - When explaining, balance medical accuracy with user-friendly clarity.
 - For any uncertain or critical cases, clearly advise consultation with a dermatologist.
@@ -43,15 +39,15 @@ Please consult a dermatologist or qualified healthcare provider for personalized
 }
 # =========================
-# FASTAPI SETUP
 # =========================
 class ChatRequest(BaseModel):
     question: str
     context: Optional[str] = None
-    mode: Optional[str] = "dermatology"  # "dermatology" | "general"
-    max_new_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
 class ChatResponse(BaseModel):
     answer: str
@@ -60,58 +56,29 @@ class ChatResponse(BaseModel):
     confidence: int
     sources: list
 app = FastAPI(title="BioGPT-Large Medical Chatbot")
-MODEL_ID = os.environ.get("MODEL_ID", "microsoft/BioGPT-Large")
-MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "200"))
-TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.7"))
-TOP_P = float(os.environ.get("TOP_P", "0.9"))
-DEVICE = int(os.environ.get("DEVICE", "-1"))  # -1 = CPU
-USE_4BIT = os.environ.get("USE_4BIT", "false").lower() == "true"
 generator = None
 @app.on_event("startup")
 def load_model():
     global generator
     try:
-        logger.info(f"Loading model: {MODEL_ID}")
-        if USE_4BIT:
-            bnb_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype="float16",
-                bnb_4bit_use_double_quant=True,
-            )
-            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_ID,
-                quantization_config=bnb_config,
-                device_map="auto",
-                trust_remote_code=True,
-            )
-        else:
-            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
-            model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
-        generator = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            device=DEVICE,
-        )
         logger.info("Model loaded successfully.")
     except Exception as e:
-        logger.exception("Model loading failed")
         generator = None
 @app.get("/")
 def root():
-    return {
-        "status": "ok",
-        "model_loaded": _loaded_model is not None,
-        "model": _loaded_model,
-    }
 @app.post("/chat", response_model=ChatResponse)
 def chat(req: ChatRequest):
@@ -121,37 +88,26 @@ def chat(req: ChatRequest):
     if not req.question.strip():
         raise HTTPException(status_code=400, detail="Question cannot be empty")
-    # Select system prompt
     mode = req.mode.lower() if req.mode else "dermatology"
     system_prompt = MEDICAL_PROMPTS.get(mode, MEDICAL_PROMPTS["general"])
-    # Build final prompt
     prompt = f"{system_prompt}\n\nUser Question: {req.question.strip()}\n\nAI Answer:"
     if req.context:
         prompt = req.context.strip() + "\n\n" + prompt
-    max_new = req.max_new_tokens or MAX_NEW_TOKENS
-    temp = req.temperature or TEMPERATURE
-    top_p = req.top_p or TOP_P
-    logger.info(f"Generating answer for: {req.question[:80]}...")
     t0 = time.time()
     try:
         outputs = generator(
             prompt,
-            max_new_tokens=max_new,
-            temperature=temp,
-            top_p=top_p,
             do_sample=True,
             return_full_text=False,
             num_return_sequences=1,
         )
         answer = outputs[0]["generated_text"].strip()
-        # Always append disclaimer
         final_answer = f"{answer}\n\n{MEDICAL_PROMPTS['disclaimer']}"
         took = time.time() - t0
         confidence = min(95, 70 + int(len(answer) / 50))

 import os
 import time
 import logging
 from typing import Optional
+from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import pipeline
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("biogpt_chatbot")
 You are DermX-AI, a specialized medical AI assistant trained in dermatology.
 Your role is to provide clear, evidence-based information about skin conditions,
 diagnostic insights, and treatment options.
 - Use simple but professional language, suitable for both patients and clinicians.
 - When explaining, balance medical accuracy with user-friendly clarity.
 - For any uncertain or critical cases, clearly advise consultation with a dermatologist.
 }
 # =========================
+# REQUEST/RESPONSE MODELS
 # =========================
 class ChatRequest(BaseModel):
     question: str
     context: Optional[str] = None
+    mode: Optional[str] = "dermatology"  # dermatology | general
+    max_new_tokens: Optional[int] = 200
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.9
 class ChatResponse(BaseModel):
     answer: str
     confidence: int
     sources: list
+# =========================
+# FASTAPI SETUP
+# =========================
 app = FastAPI(title="BioGPT-Large Medical Chatbot")
+MODEL_ID = "microsoft/BioGPT-Large"
 generator = None
 @app.on_event("startup")
 def load_model():
     global generator
+    logger.info(f"Loading Hugging Face model via pipeline: {MODEL_ID}")
     try:
+        # Use HF hosted model (CPU is fine, HF handles backend)
+        generator = pipeline("text-generation", model=MODEL_ID, device=-1)
         logger.info("Model loaded successfully.")
     except Exception as e:
+        logger.exception("Failed to load model")
         generator = None
 @app.get("/")
 def root():
+    return {"status": "ok", "model_loaded": generator is not None, "model": MODEL_ID}
 @app.post("/chat", response_model=ChatResponse)
 def chat(req: ChatRequest):
     if not req.question.strip():
         raise HTTPException(status_code=400, detail="Question cannot be empty")
+    # Build prompt
     mode = req.mode.lower() if req.mode else "dermatology"
     system_prompt = MEDICAL_PROMPTS.get(mode, MEDICAL_PROMPTS["general"])
     prompt = f"{system_prompt}\n\nUser Question: {req.question.strip()}\n\nAI Answer:"
     if req.context:
         prompt = req.context.strip() + "\n\n" + prompt
     t0 = time.time()
     try:
         outputs = generator(
             prompt,
+            max_new_tokens=req.max_new_tokens,
+            temperature=req.temperature,
+            top_p=req.top_p,
             do_sample=True,
             return_full_text=False,
             num_return_sequences=1,
         )
         answer = outputs[0]["generated_text"].strip()
         final_answer = f"{answer}\n\n{MEDICAL_PROMPTS['disclaimer']}"
         took = time.time() - t0
         confidence = min(95, 70 + int(len(answer) / 50))