Spaces:

Nithish3115
/

Tamil-Chatbot

Sleeping

App Files Files Community

Nithish3115 commited on Mar 24

Commit

377b4e3

verified ·

1 Parent(s): 105046d

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -272

app.py CHANGED Viewed

@@ -1,296 +1,241 @@
-# import os
-# import gradio as gr
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# import torch
-# # Define paths for storage - avoid persistent folder issues
-# MODEL_CACHE_DIR = "./model_cache"
-# HF_HOME_DIR = "./hf_home"
-# TRANSFORMERS_CACHE_DIR = "./transformers_cache"
-# # Set environment variables
-# os.environ["HF_HOME"] = HF_HOME_DIR
-# os.environ["TRANSFORMERS_CACHE"] = TRANSFORMERS_CACHE_DIR
-# # Create cache directories if they don't exist
-# os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
-# os.makedirs(HF_HOME_DIR, exist_ok=True)
-# os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
-# # Initialize the model and tokenizer - only when explicitly requested
-# def initialize_model():
-#     print("Loading model and tokenizer... This may take a few minutes.")
-#     try:
-#         # Load the tokenizer
-#         tokenizer = AutoTokenizer.from_pretrained(
-#             "abhinand/tamil-llama-7b-instruct-v0.2",
-#             cache_dir=MODEL_CACHE_DIR
-#         )
-#         # CPU-friendly configuration
-#         model = AutoModelForCausalLM.from_pretrained(
-#             "abhinand/tamil-llama-7b-instruct-v0.2",
-#             device_map="auto",
-#             torch_dtype="auto",
-#             low_cpu_mem_usage=True,
-#             cache_dir=MODEL_CACHE_DIR
-#         )
-#         print("Model and tokenizer loaded successfully!")
-#         return model, tokenizer
-#     except Exception as e:
-#         print(f"Error loading model: {e}")
-#         return None, None
-# # Generate response
-# def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
-#     # Check if model and tokenizer are loaded
-#     if model is None or tokenizer is None:
-#         return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."  # Model not loaded, please click 'Load Model' button
-#     # System message for the Tamil LLaMA model
-#     system_message = "You are a helpful assistant that provides accurate information in Tamil language."
-#     # Create the prompt using the template from documentation
-#     prompt_template = f"<|im_start|>system\n{system_message}<|im_end|>\n"
-#     # Process conversation history - chat_history format is list of tuples [(user_msg, bot_msg), ...]
-#     if chat_history:
-#         for user_msg, bot_msg in chat_history:
-#             if user_msg and bot_msg:  # Ensure both messages exist
-#                 prompt_template += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
-#                 prompt_template += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
-#     # Add the current user message
-#     prompt_template += f"<|im_start|>user\n{user_input}<|im_end|>\n"
-#     prompt_template += "<|im_start|>assistant\n"
-#     try:
-#         # Tokenize input
-#         inputs = tokenizer(prompt_template, return_tensors="pt", padding=True)
-#         # Generate response with user-specified parameters
-#         with torch.no_grad():
-#             output = model.generate(
-#                 inputs["input_ids"],
-#                 attention_mask=inputs["attention_mask"],
-#                 max_new_tokens=256,
-#                 do_sample=True,
-#                 temperature=temperature,
-#                 top_p=top_p,
-#                 top_k=top_k,
-#                 pad_token_id=tokenizer.eos_token_id,
-#                 eos_token_id=tokenizer.encode("<|im_end|>", add_special_tokens=False)[0] if "<|im_end|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
-#             )
-#         # Decode output
-#         generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
-#         # Extract just the assistant's response
-#         response_parts = generated_text.split("<|im_start|>assistant\n")
-#         if len(response_parts) > 1:
-#             assistant_response = response_parts[-1].split("<|im_end|>")[0].strip()
-#         else:
-#             # Fallback extraction
-#             assistant_response = generated_text[len(prompt_template):].strip()
-#             if "<|im_end|>" in assistant_response:
-#                 assistant_response = assistant_response.split("<|im_end|>")[0].strip()
-#         return assistant_response
-#     except Exception as e:
-#         print(f"Error generating response: {e}")
-#         return f"பிழை ஏற்பட்டது. மீண்டும் முயற்சிக்கவும்."  # Error occurred, please try again
-# # Function to vote/like a response
-# def vote(data, vote_type, model_name):
-#     # This is a placeholder for the voting functionality
-#     print(f"Received {vote_type} for response: {data}")
-#     return data
-# # Create the Gradio interface
-# def create_chatbot_interface():
-#     with gr.Blocks(css="css/index.css") as demo:
-#         title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
-#         description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
-#         gr.Markdown(title)
-#         gr.Markdown(description)
-#         # Model loading indicator
-#         with gr.Row():
-#             model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
-#             load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
-#         # Model and tokenizer states
-#         model = gr.State(None)
-#         tokenizer = gr.State(None)
-#         # Parameter sliders
-#         with gr.Accordion("Generation Parameters", open=False):
-#             temperature = gr.Slider(
-#                 label="temperature",
-#                 value=0.2,
-#                 minimum=0.0,
-#                 maximum=2.0,
-#                 step=0.05,
-#                 interactive=True,
-#                 info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
-#             )
-#             top_p = gr.Slider(
-#                 label="top_p",
-#                 value=1.0,
-#                 minimum=0.0,
-#                 maximum=1.0,
-#                 step=0.01,
-#                 interactive=True,
-#                 info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it"
-#             )
-#             top_k = gr.Slider(
-#                 label="top_k",
-#                 value=40,
-#                 minimum=0,
-#                 maximum=1000,
-#                 step=1,
-#                 interactive=True,
-#                 info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit."
-#             )
-#         # Function to load model on button click
-#         def load_model_fn():
-#             m, t = initialize_model()
-#             if m is not None and t is not None:
-#                 return "✅ மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
-#             else:
-#                 return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
-#         # Function to respond to user messages - fixed to handle tuples format
-#         def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
-#             # Check if model is loaded
-#             if model_state is None:
-#                 bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
-#             else:
-#                 # Generate bot response with parameters
-#                 bot_message = generate_response(
-#                     model_state,
-#                     tokenizer_state,
-#                     message,
-#                     history,  # history already in the correct format
-#                     temperature=temp,
-#                     top_p=tp,
-#                     top_k=tk
-#                 )
-#             # Return the bot's message to be added to history
-#             return bot_message
-#         # Create the chat interface
-#         chatbot = gr.Chatbot()
-#         msg = gr.Textbox(
-#             show_label=False,
-#             placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
-#         )
-#         clear = gr.Button("அழி (Clear)")
-#         # Set up the chat interface
-#         msg.submit(
-#             chat_function,
-#             [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
-#             [chatbot],
-#             queue=True,
-#         )
-#         clear.click(lambda: None, None, chatbot, queue=False)
-#         # Add examples
-#         examples = gr.Examples(
-#             examples=[
-#                 "வணக்கம், நீங்கள் யார்?",
-#                 "நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?",
-#                 "பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்.",
-#                 "சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?",
-#                 "விரைவாக தூங்குவது எப்படி?"
-#             ],
-#             inputs=msg,
-#         )
-#         # Connect the model loading button
-#         load_model_btn.click(
-#             load_model_fn,
-#             outputs=[model_status, model, tokenizer]
-#         )
-#         # Add like functionality
-#         chatbot.like(vote, None, None)
-#     return demo
-# # Create and launch the demo
-# demo = create_chatbot_interface()
-# # Launch the demo
-# if __name__ == "__main__":
-#     demo.queue(max_size=3).launch()
-from transformers import LlamaForCausalLM, AutoTokenizer, pipeline
-model = LlamaForCausalLM.from_pretrained(
-    "abhinand/tamil-llama-instruct-v0.2",
-    #load_in_8bit=True, # Set this depending on the GPU you have
-    torch_dtype=torch.bfloat16,
-    device_map={"": 0}, # Set this depending on the number of GPUs you have
-    local_files_only=False # Optional
-)
-model.eval()
-tokenizer = AutoTokenizer.from_pretrained("abhinand/tamil-llama-instruct-v0.2")
-inf_pipeline = pipeline("conversational", model=model, tokenizer=tokenizer)
-def format_instruction(system_prompt, question, return_dict=False):
-    if system_prompt is None:
-        messages = [
-            {'content': question, 'role': 'user'},
-        ]
-    else:
-        messages = [
-            {'content': system_prompt, 'role': 'system'},
-            {'content': question, 'role': 'user'},
-        ]
-    if return_dict:
-        return messages
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    return prompt
-# Set the generation configuration according to your needs
-temperature = 0.6
-repetition_penalty = 1.1
-max_new_tokens = 256
-SYSTEM_PROMPT = "You are an AI assistant who follows instructions extremely well and reply only in tamil and also can understand tamil input. Do your best your best to help."
-INPUT = "Can you explain the significance of Tamil festival Pongal?"
-instruction = format_instruction(
-    system_prompt=SYSTEM_PROMPT,
-    question=INPUT,
-    return_dict=True,
-)
-output = inf_pipeline(
-    instruction,
-    temperature=temperature,
-    max_new_tokens=max_new_tokens,
-    repetition_penalty=repetition_penalty
-)
-print(output)

+import os
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Define paths for storage - avoid persistent folder issues
+MODEL_CACHE_DIR = "./model_cache"
+HF_HOME_DIR = "./hf_home"
+TRANSFORMERS_CACHE_DIR = "./transformers_cache"
+# Set environment variables
+os.environ["HF_HOME"] = HF_HOME_DIR
+os.environ["TRANSFORMERS_CACHE"] = TRANSFORMERS_CACHE_DIR
+# Create cache directories if they don't exist
+os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
+os.makedirs(HF_HOME_DIR, exist_ok=True)
+os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
+# Initialize the model and tokenizer - only when explicitly requested
+def initialize_model():
+    print("Loading model and tokenizer... This may take a few minutes.")
+    try:
+        # Load the tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            "abhinand/tamil-llama-7b-instruct-v0.2",
+            cache_dir=MODEL_CACHE_DIR
+        )
+        # CPU-friendly configuration
+        model = AutoModelForCausalLM.from_pretrained(
+            "abhinand/tamil-llama-7b-instruct-v0.2",
+            device_map="auto",
+            torch_dtype="auto",
+            low_cpu_mem_usage=True,
+            cache_dir=MODEL_CACHE_DIR
+        )
+        print("Model and tokenizer loaded successfully!")
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None, None
+# Generate response
+def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
+    # Check if model and tokenizer are loaded
+    if model is None or tokenizer is None:
+        return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."  # Model not loaded, please click 'Load Model' button
+    # System message for the Tamil LLaMA model
+    system_message = "You are a helpful assistant that provides accurate information in Tamil language."
+    # Create the prompt using the template from documentation
+    prompt_template = f"<|im_start|>system\n{system_message}<|im_end|>\n"
+    # Process conversation history - chat_history format is list of tuples [(user_msg, bot_msg), ...]
+    if chat_history:
+        for user_msg, bot_msg in chat_history:
+            if user_msg and bot_msg:  # Ensure both messages exist
+                prompt_template += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
+                prompt_template += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
+    # Add the current user message
+    prompt_template += f"<|im_start|>user\n{user_input}<|im_end|>\n"
+    prompt_template += "<|im_start|>assistant\n"
+    try:
+        # Tokenize input
+        inputs = tokenizer(prompt_template, return_tensors="pt", padding=True)
+        # Generate response with user-specified parameters
+        with torch.no_grad():
+            output = model.generate(
+                inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                max_new_tokens=256,
+                do_sample=True,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.encode("<|im_end|>", add_special_tokens=False)[0] if "<|im_end|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
+            )
+        # Decode output
+        generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
+        # Extract just the assistant's response
+        response_parts = generated_text.split("<|im_start|>assistant\n")
+        if len(response_parts) > 1:
+            assistant_response = response_parts[-1].split("<|im_end|>")[0].strip()
+        else:
+            # Fallback extraction
+            assistant_response = generated_text[len(prompt_template):].strip()
+            if "<|im_end|>" in assistant_response:
+                assistant_response = assistant_response.split("<|im_end|>")[0].strip()
+        return assistant_response
+    except Exception as e:
+        print(f"Error generating response: {e}")
+        return f"பிழை ஏற்பட்டது. மீண்டும் முயற்சிக்கவும்."  # Error occurred, please try again
+# Function to vote/like a response
+def vote(data, vote_type, model_name):
+    # This is a placeholder for the voting functionality
+    print(f"Received {vote_type} for response: {data}")
+    return data
+# Create the Gradio interface
+def create_chatbot_interface():
+    with gr.Blocks(css="css/index.css") as demo:
+        title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
+        description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
+        gr.Markdown(title)
+        gr.Markdown(description)
+        # Model loading indicator
+        with gr.Row():
+            model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
+            load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
+        # Model and tokenizer states
+        model = gr.State(None)
+        tokenizer = gr.State(None)
+        # Parameter sliders
+        with gr.Accordion("Generation Parameters", open=False):
+            temperature = gr.Slider(
+                label="temperature",
+                value=0.2,
+                minimum=0.0,
+                maximum=2.0,
+                step=0.05,
+                interactive=True,
+                info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
+            )
+            top_p = gr.Slider(
+                label="top_p",
+                value=1.0,
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                interactive=True,
+                info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it"
+            )
+            top_k = gr.Slider(
+                label="top_k",
+                value=40,
+                minimum=0,
+                maximum=1000,
+                step=1,
+                interactive=True,
+                info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit."
+            )
+        # Function to load model on button click
+        def load_model_fn():
+            m, t = initialize_model()
+            if m is not None and t is not None:
+                return "✅ மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
+            else:
+                return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
+        # Function to respond to user messages - fixed to handle tuples format
+        def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
+            # Check if model is loaded
+            if model_state is None:
+                bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
+            else:
+                # Generate bot response with parameters
+                bot_message = generate_response(
+                    model_state,
+                    tokenizer_state,
+                    message,
+                    history,  # history already in the correct format
+                    temperature=temp,
+                    top_p=tp,
+                    top_k=tk
+                )
+            # Return the bot's message to be added to history
+            return bot_message
+        # Create the chat interface
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox(
+            show_label=False,
+            placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
+        )
+        clear = gr.Button("அழி (Clear)")
+        # Set up the chat interface
+        msg.submit(
+            chat_function,
+            [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
+            [chatbot],
+            queue=True,
+        )
+        clear.click(lambda: None, None, chatbot, queue=False)
+        # Add examples
+        examples = gr.Examples(
+            examples=[
+                "வணக்கம், நீங்கள் யார்?",
+                "நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?",
+                "பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்.",
+                "சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?",
+                "விரைவாக தூங்குவது எப்படி?"
+            ],
+            inputs=msg,
+        )
+        # Connect the model loading button
+        load_model_btn.click(
+            load_model_fn,
+            outputs=[model_status, model, tokenizer]
+        )
+        # Add like functionality
+        chatbot.like(vote, None, None)
+    return demo
+# Create and launch the demo
+demo = create_chatbot_interface()
+# Launch the demo
+if __name__ == "__main__":
+    demo.queue(max_size=3).launch()