sumoy47 commited on
Commit
49945fa
·
verified ·
1 Parent(s): 92f3c49

Upload 10 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use python 3.9 slim image
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /code
6
+
7
+ # Copy requirements first (for caching)
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Copy the model directory and code
14
+ # MAKE SURE your model is unzipped in a folder named 'model'
15
+ COPY ./model /code/model
16
+ COPY ./main.py /code/main.py
17
+
18
+ # Run the app
19
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ from lime.lime_text import LimeTextExplainer
8
+ import numpy as np
9
+ import os
10
+
11
+ app = FastAPI(title="MedGuard API")
12
+
13
+ # --- NUCLEAR CORS FIX ---
14
+ # Allow EVERYTHING. This rules out CORS as the problem.
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"],
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ # --- CONFIGURATION ---
24
+ MODEL_PATH = "./model"
25
+ DEVICE = "cpu"
26
+
27
+ print(f"🔄 Loading Model from {MODEL_PATH}...")
28
+ model = None
29
+ tokenizer = None
30
+
31
+ try:
32
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
33
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
34
+ model.to(DEVICE)
35
+ model.eval()
36
+ print("✅ Model Loaded Successfully!")
37
+ except Exception as e:
38
+ print(f"❌ Error loading local model: {e}")
39
+ MODEL_NAME = "csebuetnlp/banglabert"
40
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
41
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)
42
+
43
+ # --- DATA MODELS ---
44
+ class QueryRequest(BaseModel):
45
+ text: str
46
+
47
+ class PredictionResponse(BaseModel):
48
+ label: str
49
+ confidence: float
50
+ probs: dict
51
+ explanation: list = None
52
+
53
+ LABELS = ["Highly Relevant", "Partially Relevant", "Not Relevant"]
54
+
55
+ def predict_proba_lime(texts):
56
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=128).to(DEVICE)
57
+ with torch.no_grad():
58
+ outputs = model(**inputs)
59
+ return torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
60
+
61
+ @app.get("/")
62
+ def health_check():
63
+ return {"status": "active", "model": "MedGuard v1.0"}
64
+
65
+ @app.post("/predict", response_model=PredictionResponse)
66
+ def predict(request: QueryRequest):
67
+ if not model or not tokenizer:
68
+ raise HTTPException(status_code=503, detail="Model not loaded")
69
+
70
+ try:
71
+ inputs = tokenizer(request.text, return_tensors="pt", truncation=True, max_length=128).to(DEVICE)
72
+ with torch.no_grad():
73
+ outputs = model(**inputs)
74
+ probs = F.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
75
+
76
+ pred_idx = np.argmax(probs)
77
+
78
+ # LIME (Reduced to 20 samples for speed testing)
79
+ explainer = LimeTextExplainer(class_names=LABELS, split_expression=lambda x: x.split())
80
+ exp = explainer.explain_instance(request.text, predict_proba_lime, num_features=6, num_samples=20, labels=[pred_idx])
81
+ lime_features = exp.as_list(label=pred_idx)
82
+
83
+ return {
84
+ "label": LABELS[pred_idx],
85
+ "confidence": round(float(probs[pred_idx]) * 100, 2),
86
+ "probs": {l: round(float(p), 4) for l, p in zip(LABELS, probs)},
87
+ "explanation": lime_features
88
+ }
89
+ except Exception as e:
90
+ print(f"Server Error: {e}") # Print error to backend terminal
91
+ raise HTTPException(status_code=500, detail=str(e))
92
+
93
+ if __name__ == "__main__":
94
+ import uvicorn
95
+ # Bind to localhost specifically
96
+ uvicorn.run(app, host="127.0.0.1", port=8000)
model/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "embedding_size": 768,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "LABEL_0": 0,
21
+ "LABEL_1": 1,
22
+ "LABEL_2": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "electra",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "single_label_classification",
32
+ "summary_activation": "gelu",
33
+ "summary_last_dropout": 0.1,
34
+ "summary_type": "first",
35
+ "summary_use_proj": true,
36
+ "transformers_version": "4.57.1",
37
+ "type_vocab_size": 2,
38
+ "use_cache": true,
39
+ "vocab_size": 32000
40
+ }
model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c629f3b73a705b26cccfea07913b6b6660658e6bf97d38e7d445d58d4739925
3
+ size 442502748
model/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
model/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "full_tokenizer_file": null,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": false,
57
+ "tokenizer_class": "ElectraTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724b5929cc16aead90319cdfae62b336b875ed296fef079a6838ca0e3a355469
3
+ size 5841
model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ transformers
5
+ numpy
6
+ lime
7
+ pydantic
8
+ python-multipart