File size: 1,722 Bytes
ab57b6a
 
 
7a074ab
8d89728
 
56f1c8f
dc75edf
 
7a074ab
 
dc75edf
ab57b6a
56f1c8f
 
 
 
 
 
 
 
 
ab57b6a
56f1c8f
 
 
 
 
dc75edf
56f1c8f
 
dc75edf
ab57b6a
56f1c8f
7a074ab
ab57b6a
56f1c8f
 
ab57b6a
56f1c8f
 
ab57b6a
dc75edf
 
56f1c8f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import joblib, numpy as np, torch, transformers

# ── load artefacts once ───────────────────────────────────────────────
rf       = joblib.load("models/RandomForest_tfidf_pipe.joblib")
thr      = np.load("models/RandomForest_tfidf_thresh.npy")
mlb      = joblib.load("models/mlb.joblib")

embedder  = transformers.AutoModel.from_pretrained(
    "BAAI/bge-m3", trust_remote_code=True
).eval()
tokenizer = transformers.AutoTokenizer.from_pretrained("BAAI/bge-m3")

def predict(payload: dict):
    """
    payload: a dict of key→value pairs.
    Concatenate all values (in arbitrary order) into one text blob.
    """
    # 1. concatenate all values into one string
    text = " ".join(str(v) for v in payload.values())

    # 2. embed + classify
    with torch.inference_mode():
        inputs = tokenizer(text, return_tensors="pt", truncation=True)
        vec    = embedder(**inputs).pooler_output.numpy()
    proba = rf.predict_proba(vec)[0]
    mask  = proba >= thr

    return {
        "labels": mlb.classes_[mask].tolist(),
        "scores": proba[mask].round(4).tolist()
    }

# ── Gradio needs exactly this global var at import time ────────────────
demo = gr.Interface(
    fn=predict,
    inputs=gr.JSON(label="Input JSON payload"),
    outputs=gr.JSON(label="Predicted labels & scores"),
    title="Drill-Category Classifier",
    description="Send a JSON object; its values will be concatenated and classified.",
    api_name="predict"
)

if __name__ == "__main__":
    demo.queue()   # optional: enables concurrency & progress events
    demo.launch()