MONO.AI / app.py
DSDUDEd's picture
Update app.py
81b66ff verified
import gradio as gr
import torch
from transformers import GPT2Tokenizer, AutoModelForCausalLM
from peft import PeftModel
# 1️⃣ Load fallback tokenizer (GPT2)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token # Required for causal LM
# 2️⃣ Load base model
base_model_name = "TRM-coding/PythonCopilot"
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
# 3️⃣ Resize embeddings to match PEFT checkpoint vocab
checkpoint_vocab_size = 50257 # From DSUDUDe/funfox PEFT model
base_model.resize_token_embeddings(checkpoint_vocab_size)
# 4️⃣ Load PEFT/LoRA adapter
peft_model_name = "DSDUDEd/funfox"
model = PeftModel.from_pretrained(base_model, peft_model_name)
model.eval()
# 5️⃣ Define generation function
def generate_text(prompt, max_tokens=50):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
top_p=0.9,
temperature=0.8
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 6️⃣ Build Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(label="Enter Prompt", lines=2, placeholder="Type something..."),
gr.Slider(minimum=10, maximum=200, step=10, label="Max Tokens")
],
outputs=gr.Textbox(label="Generated Text"),
title="FunFox PEFT Model",
description="FunFox LoRA model fine-tuned on PythonCopilot base."
)
# 7️⃣ Launch
iface.launch(share=True)