rudra0410hf commited on
Commit
92e4e8d
·
verified ·
1 Parent(s): 203435a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -64
app.py CHANGED
@@ -1,13 +1,10 @@
1
- # app/main.py
2
-
3
  import os
4
  import time
5
  import logging
6
  from typing import Optional
7
-
8
- from fastapi import FastAPI, HTTPException, Query
9
  from pydantic import BaseModel
10
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
11
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger("biogpt_chatbot")
@@ -20,7 +17,6 @@ MEDICAL_PROMPTS = {
20
  You are DermX-AI, a specialized medical AI assistant trained in dermatology.
21
  Your role is to provide clear, evidence-based information about skin conditions,
22
  diagnostic insights, and treatment options.
23
-
24
  - Use simple but professional language, suitable for both patients and clinicians.
25
  - When explaining, balance medical accuracy with user-friendly clarity.
26
  - For any uncertain or critical cases, clearly advise consultation with a dermatologist.
@@ -43,15 +39,15 @@ Please consult a dermatologist or qualified healthcare provider for personalized
43
  }
44
 
45
  # =========================
46
- # FASTAPI SETUP
47
  # =========================
48
  class ChatRequest(BaseModel):
49
  question: str
50
  context: Optional[str] = None
51
- mode: Optional[str] = "dermatology" # "dermatology" | "general"
52
- max_new_tokens: Optional[int] = None
53
- temperature: Optional[float] = None
54
- top_p: Optional[float] = None
55
 
56
  class ChatResponse(BaseModel):
57
  answer: str
@@ -60,58 +56,29 @@ class ChatResponse(BaseModel):
60
  confidence: int
61
  sources: list
62
 
 
 
 
63
  app = FastAPI(title="BioGPT-Large Medical Chatbot")
64
 
65
- MODEL_ID = os.environ.get("MODEL_ID", "microsoft/BioGPT-Large")
66
- MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "200"))
67
- TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.7"))
68
- TOP_P = float(os.environ.get("TOP_P", "0.9"))
69
- DEVICE = int(os.environ.get("DEVICE", "-1")) # -1 = CPU
70
- USE_4BIT = os.environ.get("USE_4BIT", "false").lower() == "true"
71
-
72
  generator = None
73
 
74
  @app.on_event("startup")
75
  def load_model():
76
  global generator
 
77
  try:
78
- logger.info(f"Loading model: {MODEL_ID}")
79
- if USE_4BIT:
80
- bnb_config = BitsAndBytesConfig(
81
- load_in_4bit=True,
82
- bnb_4bit_quant_type="nf4",
83
- bnb_4bit_compute_dtype="float16",
84
- bnb_4bit_use_double_quant=True,
85
- )
86
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
87
- model = AutoModelForCausalLM.from_pretrained(
88
- MODEL_ID,
89
- quantization_config=bnb_config,
90
- device_map="auto",
91
- trust_remote_code=True,
92
- )
93
- else:
94
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)
95
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
96
-
97
- generator = pipeline(
98
- "text-generation",
99
- model=model,
100
- tokenizer=tokenizer,
101
- device=DEVICE,
102
- )
103
  logger.info("Model loaded successfully.")
104
  except Exception as e:
105
- logger.exception("Model loading failed")
106
  generator = None
107
 
108
  @app.get("/")
109
  def root():
110
- return {
111
- "status": "ok",
112
- "model_loaded": _loaded_model is not None,
113
- "model": _loaded_model,
114
- }
115
 
116
  @app.post("/chat", response_model=ChatResponse)
117
  def chat(req: ChatRequest):
@@ -121,37 +88,26 @@ def chat(req: ChatRequest):
121
  if not req.question.strip():
122
  raise HTTPException(status_code=400, detail="Question cannot be empty")
123
 
124
- # Select system prompt
125
  mode = req.mode.lower() if req.mode else "dermatology"
126
  system_prompt = MEDICAL_PROMPTS.get(mode, MEDICAL_PROMPTS["general"])
127
-
128
- # Build final prompt
129
  prompt = f"{system_prompt}\n\nUser Question: {req.question.strip()}\n\nAI Answer:"
130
  if req.context:
131
  prompt = req.context.strip() + "\n\n" + prompt
132
 
133
- max_new = req.max_new_tokens or MAX_NEW_TOKENS
134
- temp = req.temperature or TEMPERATURE
135
- top_p = req.top_p or TOP_P
136
-
137
- logger.info(f"Generating answer for: {req.question[:80]}...")
138
  t0 = time.time()
139
-
140
  try:
141
  outputs = generator(
142
  prompt,
143
- max_new_tokens=max_new,
144
- temperature=temp,
145
- top_p=top_p,
146
  do_sample=True,
147
  return_full_text=False,
148
  num_return_sequences=1,
149
  )
150
  answer = outputs[0]["generated_text"].strip()
151
-
152
- # Always append disclaimer
153
  final_answer = f"{answer}\n\n{MEDICAL_PROMPTS['disclaimer']}"
154
-
155
  took = time.time() - t0
156
  confidence = min(95, 70 + int(len(answer) / 50))
157
 
 
 
 
1
  import os
2
  import time
3
  import logging
4
  from typing import Optional
5
+ from fastapi import FastAPI, HTTPException
 
6
  from pydantic import BaseModel
7
+ from transformers import pipeline
8
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger("biogpt_chatbot")
 
17
  You are DermX-AI, a specialized medical AI assistant trained in dermatology.
18
  Your role is to provide clear, evidence-based information about skin conditions,
19
  diagnostic insights, and treatment options.
 
20
  - Use simple but professional language, suitable for both patients and clinicians.
21
  - When explaining, balance medical accuracy with user-friendly clarity.
22
  - For any uncertain or critical cases, clearly advise consultation with a dermatologist.
 
39
  }
40
 
41
  # =========================
42
+ # REQUEST/RESPONSE MODELS
43
  # =========================
44
  class ChatRequest(BaseModel):
45
  question: str
46
  context: Optional[str] = None
47
+ mode: Optional[str] = "dermatology" # dermatology | general
48
+ max_new_tokens: Optional[int] = 200
49
+ temperature: Optional[float] = 0.7
50
+ top_p: Optional[float] = 0.9
51
 
52
  class ChatResponse(BaseModel):
53
  answer: str
 
56
  confidence: int
57
  sources: list
58
 
59
+ # =========================
60
+ # FASTAPI SETUP
61
+ # =========================
62
  app = FastAPI(title="BioGPT-Large Medical Chatbot")
63
 
64
+ MODEL_ID = "microsoft/BioGPT-Large"
 
 
 
 
 
 
65
  generator = None
66
 
67
  @app.on_event("startup")
68
  def load_model():
69
  global generator
70
+ logger.info(f"Loading Hugging Face model via pipeline: {MODEL_ID}")
71
  try:
72
+ # Use HF hosted model (CPU is fine, HF handles backend)
73
+ generator = pipeline("text-generation", model=MODEL_ID, device=-1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  logger.info("Model loaded successfully.")
75
  except Exception as e:
76
+ logger.exception("Failed to load model")
77
  generator = None
78
 
79
  @app.get("/")
80
  def root():
81
+ return {"status": "ok", "model_loaded": generator is not None, "model": MODEL_ID}
 
 
 
 
82
 
83
  @app.post("/chat", response_model=ChatResponse)
84
  def chat(req: ChatRequest):
 
88
  if not req.question.strip():
89
  raise HTTPException(status_code=400, detail="Question cannot be empty")
90
 
91
+ # Build prompt
92
  mode = req.mode.lower() if req.mode else "dermatology"
93
  system_prompt = MEDICAL_PROMPTS.get(mode, MEDICAL_PROMPTS["general"])
 
 
94
  prompt = f"{system_prompt}\n\nUser Question: {req.question.strip()}\n\nAI Answer:"
95
  if req.context:
96
  prompt = req.context.strip() + "\n\n" + prompt
97
 
 
 
 
 
 
98
  t0 = time.time()
 
99
  try:
100
  outputs = generator(
101
  prompt,
102
+ max_new_tokens=req.max_new_tokens,
103
+ temperature=req.temperature,
104
+ top_p=req.top_p,
105
  do_sample=True,
106
  return_full_text=False,
107
  num_return_sequences=1,
108
  )
109
  answer = outputs[0]["generated_text"].strip()
 
 
110
  final_answer = f"{answer}\n\n{MEDICAL_PROMPTS['disclaimer']}"
 
111
  took = time.time() - t0
112
  confidence = min(95, 70 + int(len(answer) / 50))
113