|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import torch |
|
|
import gradio as gr |
|
|
from unsloth import FastLanguageModel |
|
|
from trl import SFTTrainer |
|
|
from transformers import TrainingArguments |
|
|
from datasets import load_dataset |
|
|
|
|
|
|
|
|
max_seq_length = 2048 |
|
|
dataset_path = "dataset.jsonl" |
|
|
dataset = load_dataset("json", data_files=dataset_path) |
|
|
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
|
model_name="unsloth/Meta-Llama-3.1-8B", |
|
|
max_seq_length=max_seq_length, |
|
|
dtype=None, |
|
|
load_in_4bit=True, |
|
|
) |
|
|
|
|
|
|
|
|
model = FastLanguageModel.for_inference(model) |
|
|
|
|
|
|
|
|
def generate_response(user_input): |
|
|
input = tokenizer(user_input, return_tensors="pt").to("cpu") |
|
|
output = model.generate(**input) |
|
|
return tokenizer.batch_decode(output)[0] |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_response, |
|
|
inputs=gr.inputs.Textbox(label="User Input"), |
|
|
outputs=gr.outputs.Textbox(label="Bot Response"), |
|
|
title="Chatbot with Llama 3.1", |
|
|
description="A chatbot powered by the Llama 3.1 model. Type your message below." |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |