FlameF0X commited on
Commit
1c79012
·
verified ·
1 Parent(s): 6e7b438

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM # or your model class
3
+ import gradio as gr
4
+
5
+ # load tokenizer & model
6
+ model_name = "FlameF0X/i3-80m" # replace with correct HF model path
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+ model.eval()
10
+ if torch.cuda.is_available():
11
+ model = model.cuda()
12
+
13
+ def generate(prompt: str, max_new_tokens: int = 100, temperature: float = 1.0, top_k: int = None):
14
+ inputs = tokenizer(prompt, return_tensors="pt")
15
+ input_ids = inputs["input_ids"]
16
+ if torch.cuda.is_available():
17
+ input_ids = input_ids.cuda()
18
+ with torch.no_grad():
19
+ output_ids = model.generate(
20
+ input_ids,
21
+ max_new_tokens=max_new_tokens,
22
+ temperature=temperature,
23
+ top_k=top_k,
24
+ do_sample=True
25
+ )
26
+ output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
27
+ return output
28
+
29
+ # Gradio interface
30
+ iface = gr.Interface(
31
+ fn=generate,
32
+ inputs=[
33
+ gr.Textbox(label="Prompt", lines=2, placeholder="Enter prompt here..."),
34
+ gr.Slider(label="Max new tokens", minimum=1, maximum=500, step=1, value=100),
35
+ gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
36
+ gr.Slider(label="Top-k (0 = disabled)", minimum=0, maximum=200, step=1, value=40)
37
+ ],
38
+ outputs=gr.Textbox(label="Generated Text"),
39
+ title="i3-80m Generation Demo",
40
+ description="Interact with the i3 hybrid-architecture model."
41
+ )
42
+
43
+ iface.launch(server_name="0.0.0.0", server_port=7860)