|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import GPT2Tokenizer, AutoModelForCausalLM |
|
|
from peft import PeftModel |
|
|
|
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
base_model_name = "TRM-coding/PythonCopilot" |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
base_model_name, |
|
|
torch_dtype=torch.float16 if device == "cuda" else torch.float32 |
|
|
).to(device) |
|
|
|
|
|
|
|
|
checkpoint_vocab_size = 50257 |
|
|
base_model.resize_token_embeddings(checkpoint_vocab_size) |
|
|
|
|
|
|
|
|
peft_model_name = "DSDUDEd/funfox" |
|
|
model = PeftModel.from_pretrained(base_model, peft_model_name) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
def generate_text(prompt, max_tokens=50): |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_tokens, |
|
|
do_sample=True, |
|
|
top_p=0.9, |
|
|
temperature=0.8 |
|
|
) |
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_text, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Enter Prompt", lines=2, placeholder="Type something..."), |
|
|
gr.Slider(minimum=10, maximum=200, step=10, label="Max Tokens") |
|
|
], |
|
|
outputs=gr.Textbox(label="Generated Text"), |
|
|
title="FunFox PEFT Model", |
|
|
description="FunFox LoRA model fine-tuned on PythonCopilot base." |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch(share=True) |
|
|
|