# app.py # -*- coding: utf-8 -*- import os import torch import gradio as gr from unsloth import FastLanguageModel from trl import SFTTrainer from transformers import TrainingArguments from datasets import load_dataset # Load the model and tokenizer max_seq_length = 2048 dataset_path = "dataset.jsonl" # Update this path as needed dataset = load_dataset("json", data_files=dataset_path) model, tokenizer = FastLanguageModel.from_pretrained( model_name="unsloth/Meta-Llama-3.1-8B", max_seq_length=max_seq_length, dtype=None, load_in_4bit=True, ) # Prepare the model for inference model = FastLanguageModel.for_inference(model) # Function to generate text def generate_response(user_input): input = tokenizer(user_input, return_tensors="pt").to("cpu") output = model.generate(**input) return tokenizer.batch_decode(output)[0] # Create Gradio interface iface = gr.Interface( fn=generate_response, inputs=gr.inputs.Textbox(label="User Input"), outputs=gr.outputs.Textbox(label="Bot Response"), title="Chatbot with Llama 3.1", description="A chatbot powered by the Llama 3.1 model. Type your message below." ) # Launch the Gradio app if __name__ == "__main__": iface.launch()