Nithish3115 commited on
Commit
377b4e3
·
verified ·
1 Parent(s): 105046d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -272
app.py CHANGED
@@ -1,296 +1,241 @@
1
- # import os
2
- # import gradio as gr
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # import torch
5
-
6
- # # Define paths for storage - avoid persistent folder issues
7
- # MODEL_CACHE_DIR = "./model_cache"
8
- # HF_HOME_DIR = "./hf_home"
9
- # TRANSFORMERS_CACHE_DIR = "./transformers_cache"
10
-
11
- # # Set environment variables
12
- # os.environ["HF_HOME"] = HF_HOME_DIR
13
- # os.environ["TRANSFORMERS_CACHE"] = TRANSFORMERS_CACHE_DIR
14
-
15
- # # Create cache directories if they don't exist
16
- # os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
17
- # os.makedirs(HF_HOME_DIR, exist_ok=True)
18
- # os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
19
-
20
- # # Initialize the model and tokenizer - only when explicitly requested
21
- # def initialize_model():
22
- # print("Loading model and tokenizer... This may take a few minutes.")
23
 
24
- # try:
25
- # # Load the tokenizer
26
- # tokenizer = AutoTokenizer.from_pretrained(
27
- # "abhinand/tamil-llama-7b-instruct-v0.2",
28
- # cache_dir=MODEL_CACHE_DIR
29
- # )
30
-
31
- # # CPU-friendly configuration
32
- # model = AutoModelForCausalLM.from_pretrained(
33
- # "abhinand/tamil-llama-7b-instruct-v0.2",
34
- # device_map="auto",
35
- # torch_dtype="auto",
36
- # low_cpu_mem_usage=True,
37
- # cache_dir=MODEL_CACHE_DIR
38
- # )
39
-
40
- # print("Model and tokenizer loaded successfully!")
41
- # return model, tokenizer
42
- # except Exception as e:
43
- # print(f"Error loading model: {e}")
44
- # return None, None
45
-
46
- # # Generate response
47
- # def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
48
- # # Check if model and tokenizer are loaded
49
- # if model is None or tokenizer is None:
50
- # return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்." # Model not loaded, please click 'Load Model' button
51
 
52
- # # System message for the Tamil LLaMA model
53
- # system_message = "You are a helpful assistant that provides accurate information in Tamil language."
54
 
55
- # # Create the prompt using the template from documentation
56
- # prompt_template = f"<|im_start|>system\n{system_message}<|im_end|>\n"
57
 
58
- # # Process conversation history - chat_history format is list of tuples [(user_msg, bot_msg), ...]
59
- # if chat_history:
60
- # for user_msg, bot_msg in chat_history:
61
- # if user_msg and bot_msg: # Ensure both messages exist
62
- # prompt_template += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
63
- # prompt_template += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
64
 
65
- # # Add the current user message
66
- # prompt_template += f"<|im_start|>user\n{user_input}<|im_end|>\n"
67
- # prompt_template += "<|im_start|>assistant\n"
68
 
69
- # try:
70
- # # Tokenize input
71
- # inputs = tokenizer(prompt_template, return_tensors="pt", padding=True)
72
-
73
- # # Generate response with user-specified parameters
74
- # with torch.no_grad():
75
- # output = model.generate(
76
- # inputs["input_ids"],
77
- # attention_mask=inputs["attention_mask"],
78
- # max_new_tokens=256,
79
- # do_sample=True,
80
- # temperature=temperature,
81
- # top_p=top_p,
82
- # top_k=top_k,
83
- # pad_token_id=tokenizer.eos_token_id,
84
- # eos_token_id=tokenizer.encode("<|im_end|>", add_special_tokens=False)[0] if "<|im_end|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
85
- # )
86
-
87
- # # Decode output
88
- # generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
89
-
90
- # # Extract just the assistant's response
91
- # response_parts = generated_text.split("<|im_start|>assistant\n")
92
- # if len(response_parts) > 1:
93
- # assistant_response = response_parts[-1].split("<|im_end|>")[0].strip()
94
- # else:
95
- # # Fallback extraction
96
- # assistant_response = generated_text[len(prompt_template):].strip()
97
- # if "<|im_end|>" in assistant_response:
98
- # assistant_response = assistant_response.split("<|im_end|>")[0].strip()
99
 
100
- # return assistant_response
101
- # except Exception as e:
102
- # print(f"Error generating response: {e}")
103
- # return f"பிழை ஏற்பட்டது. மீண்டும் முயற்சிக்கவும்." # Error occurred, please try again
104
-
105
- # # Function to vote/like a response
106
- # def vote(data, vote_type, model_name):
107
- # # This is a placeholder for the voting functionality
108
- # print(f"Received {vote_type} for response: {data}")
109
- # return data
110
-
111
- # # Create the Gradio interface
112
- # def create_chatbot_interface():
113
- # with gr.Blocks(css="css/index.css") as demo:
114
- # title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
115
- # description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
116
-
117
- # gr.Markdown(title)
118
- # gr.Markdown(description)
119
-
120
- # # Model loading indicator
121
- # with gr.Row():
122
- # model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
123
- # load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
124
-
125
- # # Model and tokenizer states
126
- # model = gr.State(None)
127
- # tokenizer = gr.State(None)
128
-
129
- # # Parameter sliders
130
- # with gr.Accordion("Generation Parameters", open=False):
131
- # temperature = gr.Slider(
132
- # label="temperature",
133
- # value=0.2,
134
- # minimum=0.0,
135
- # maximum=2.0,
136
- # step=0.05,
137
- # interactive=True,
138
- # info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
139
- # )
140
 
141
- # top_p = gr.Slider(
142
- # label="top_p",
143
- # value=1.0,
144
- # minimum=0.0,
145
- # maximum=1.0,
146
- # step=0.01,
147
- # interactive=True,
148
- # info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it"
149
- # )
150
 
151
- # top_k = gr.Slider(
152
- # label="top_k",
153
- # value=40,
154
- # minimum=0,
155
- # maximum=1000,
156
- # step=1,
157
- # interactive=True,
158
- # info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit."
159
- # )
160
-
161
- # # Function to load model on button click
162
- # def load_model_fn():
163
- # m, t = initialize_model()
164
- # if m is not None and t is not None:
165
- # return "✅ மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
166
- # else:
167
- # return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
168
-
169
- # # Function to respond to user messages - fixed to handle tuples format
170
- # def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
171
- # # Check if model is loaded
172
- # if model_state is None:
173
- # bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
174
- # else:
175
- # # Generate bot response with parameters
176
- # bot_message = generate_response(
177
- # model_state,
178
- # tokenizer_state,
179
- # message,
180
- # history, # history already in the correct format
181
- # temperature=temp,
182
- # top_p=tp,
183
- # top_k=tk
184
- # )
185
 
186
- # # Return the bot's message to be added to history
187
- # return bot_message
188
 
189
- # # Create the chat interface
190
- # chatbot = gr.Chatbot()
191
- # msg = gr.Textbox(
192
- # show_label=False,
193
- # placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
194
- # )
195
- # clear = gr.Button("அழி (Clear)")
196
 
197
- # # Set up the chat interface
198
- # msg.submit(
199
- # chat_function,
200
- # [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
201
- # [chatbot],
202
- # queue=True,
203
- # )
204
- # clear.click(lambda: None, None, chatbot, queue=False)
205
 
206
- # # Add examples
207
- # examples = gr.Examples(
208
- # examples=[
209
- # "வணக்கம், நீங்கள் யார்?",
210
- # "நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?",
211
- # "பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்.",
212
- # "சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?",
213
- # "விரைவாக தூங்குவது எப்படி?"
214
- # ],
215
- # inputs=msg,
216
- # )
217
 
218
- # # Connect the model loading button
219
- # load_model_btn.click(
220
- # load_model_fn,
221
- # outputs=[model_status, model, tokenizer]
222
- # )
223
 
224
- # # Add like functionality
225
- # chatbot.like(vote, None, None)
226
 
227
- # return demo
228
-
229
- # # Create and launch the demo
230
- # demo = create_chatbot_interface()
231
-
232
- # # Launch the demo
233
- # if __name__ == "__main__":
234
-
235
-
236
- # demo.queue(max_size=3).launch()
237
-
238
-
239
-
240
-
241
- from transformers import LlamaForCausalLM, AutoTokenizer, pipeline
242
-
243
- model = LlamaForCausalLM.from_pretrained(
244
- "abhinand/tamil-llama-instruct-v0.2",
245
- #load_in_8bit=True, # Set this depending on the GPU you have
246
- torch_dtype=torch.bfloat16,
247
- device_map={"": 0}, # Set this depending on the number of GPUs you have
248
- local_files_only=False # Optional
249
- )
250
- model.eval()
251
-
252
- tokenizer = AutoTokenizer.from_pretrained("abhinand/tamil-llama-instruct-v0.2")
253
-
254
- inf_pipeline = pipeline("conversational", model=model, tokenizer=tokenizer)
255
-
256
-
257
- def format_instruction(system_prompt, question, return_dict=False):
258
- if system_prompt is None:
259
- messages = [
260
- {'content': question, 'role': 'user'},
261
- ]
262
- else:
263
- messages = [
264
- {'content': system_prompt, 'role': 'system'},
265
- {'content': question, 'role': 'user'},
266
- ]
267
 
268
- if return_dict:
269
- return messages
270
 
271
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
272
 
273
- return prompt
274
 
 
275
 
276
- # Set the generation configuration according to your needs
277
- temperature = 0.6
278
- repetition_penalty = 1.1
279
- max_new_tokens = 256
280
 
281
- SYSTEM_PROMPT = "You are an AI assistant who follows instructions extremely well and reply only in tamil and also can understand tamil input. Do your best your best to help."
282
- INPUT = "Can you explain the significance of Tamil festival Pongal?"
283
 
284
- instruction = format_instruction(
285
- system_prompt=SYSTEM_PROMPT,
286
- question=INPUT,
287
- return_dict=True,
288
- )
289
 
290
- output = inf_pipeline(
291
- instruction,
292
- temperature=temperature,
293
- max_new_tokens=max_new_tokens,
294
- repetition_penalty=repetition_penalty
295
- )
296
- print(output)
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+
6
+ # Define paths for storage - avoid persistent folder issues
7
+ MODEL_CACHE_DIR = "./model_cache"
8
+ HF_HOME_DIR = "./hf_home"
9
+ TRANSFORMERS_CACHE_DIR = "./transformers_cache"
10
+
11
+ # Set environment variables
12
+ os.environ["HF_HOME"] = HF_HOME_DIR
13
+ os.environ["TRANSFORMERS_CACHE"] = TRANSFORMERS_CACHE_DIR
14
+
15
+ # Create cache directories if they don't exist
16
+ os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
17
+ os.makedirs(HF_HOME_DIR, exist_ok=True)
18
+ os.makedirs(TRANSFORMERS_CACHE_DIR, exist_ok=True)
19
+
20
+ # Initialize the model and tokenizer - only when explicitly requested
21
+ def initialize_model():
22
+ print("Loading model and tokenizer... This may take a few minutes.")
23
 
24
+ try:
25
+ # Load the tokenizer
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ "abhinand/tamil-llama-7b-instruct-v0.2",
28
+ cache_dir=MODEL_CACHE_DIR
29
+ )
30
+
31
+ # CPU-friendly configuration
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ "abhinand/tamil-llama-7b-instruct-v0.2",
34
+ device_map="auto",
35
+ torch_dtype="auto",
36
+ low_cpu_mem_usage=True,
37
+ cache_dir=MODEL_CACHE_DIR
38
+ )
39
+
40
+ print("Model and tokenizer loaded successfully!")
41
+ return model, tokenizer
42
+ except Exception as e:
43
+ print(f"Error loading model: {e}")
44
+ return None, None
45
+
46
+ # Generate response
47
+ def generate_response(model, tokenizer, user_input, chat_history, temperature=0.2, top_p=1.0, top_k=40):
48
+ # Check if model and tokenizer are loaded
49
+ if model is None or tokenizer is None:
50
+ return "மாதிரி ஏற்றப்படவில்லை. 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்." # Model not loaded, please click 'Load Model' button
51
 
52
+ # System message for the Tamil LLaMA model
53
+ system_message = "You are a helpful assistant that provides accurate information in Tamil language."
54
 
55
+ # Create the prompt using the template from documentation
56
+ prompt_template = f"<|im_start|>system\n{system_message}<|im_end|>\n"
57
 
58
+ # Process conversation history - chat_history format is list of tuples [(user_msg, bot_msg), ...]
59
+ if chat_history:
60
+ for user_msg, bot_msg in chat_history:
61
+ if user_msg and bot_msg: # Ensure both messages exist
62
+ prompt_template += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
63
+ prompt_template += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
64
 
65
+ # Add the current user message
66
+ prompt_template += f"<|im_start|>user\n{user_input}<|im_end|>\n"
67
+ prompt_template += "<|im_start|>assistant\n"
68
 
69
+ try:
70
+ # Tokenize input
71
+ inputs = tokenizer(prompt_template, return_tensors="pt", padding=True)
72
+
73
+ # Generate response with user-specified parameters
74
+ with torch.no_grad():
75
+ output = model.generate(
76
+ inputs["input_ids"],
77
+ attention_mask=inputs["attention_mask"],
78
+ max_new_tokens=256,
79
+ do_sample=True,
80
+ temperature=temperature,
81
+ top_p=top_p,
82
+ top_k=top_k,
83
+ pad_token_id=tokenizer.eos_token_id,
84
+ eos_token_id=tokenizer.encode("<|im_end|>", add_special_tokens=False)[0] if "<|im_end|>" in tokenizer.get_vocab() else tokenizer.eos_token_id
85
+ )
86
+
87
+ # Decode output
88
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
89
+
90
+ # Extract just the assistant's response
91
+ response_parts = generated_text.split("<|im_start|>assistant\n")
92
+ if len(response_parts) > 1:
93
+ assistant_response = response_parts[-1].split("<|im_end|>")[0].strip()
94
+ else:
95
+ # Fallback extraction
96
+ assistant_response = generated_text[len(prompt_template):].strip()
97
+ if "<|im_end|>" in assistant_response:
98
+ assistant_response = assistant_response.split("<|im_end|>")[0].strip()
99
 
100
+ return assistant_response
101
+ except Exception as e:
102
+ print(f"Error generating response: {e}")
103
+ return f"பிழை ஏற்பட்டது. மீண்டும் முயற்சிக்கவும்." # Error occurred, please try again
104
+
105
+ # Function to vote/like a response
106
+ def vote(data, vote_type, model_name):
107
+ # This is a placeholder for the voting functionality
108
+ print(f"Received {vote_type} for response: {data}")
109
+ return data
110
+
111
+ # Create the Gradio interface
112
+ def create_chatbot_interface():
113
+ with gr.Blocks(css="css/index.css") as demo:
114
+ title = "# தமிழ் உரையாடல் பொத்தான் (Tamil Chatbot)"
115
+ description = "Tamil LLaMA 7B Instruct model with user-controlled generation parameters."
116
+
117
+ gr.Markdown(title)
118
+ gr.Markdown(description)
119
+
120
+ # Model loading indicator
121
+ with gr.Row():
122
+ model_status = gr.Markdown("⚠️ மாதிரி ஏற்றப்படவில்லை (Model not loaded)")
123
+ load_model_btn = gr.Button("மாதிரியை ஏற்று (Load Model)")
124
+
125
+ # Model and tokenizer states
126
+ model = gr.State(None)
127
+ tokenizer = gr.State(None)
128
+
129
+ # Parameter sliders
130
+ with gr.Accordion("Generation Parameters", open=False):
131
+ temperature = gr.Slider(
132
+ label="temperature",
133
+ value=0.2,
134
+ minimum=0.0,
135
+ maximum=2.0,
136
+ step=0.05,
137
+ interactive=True,
138
+ info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
139
+ )
140
 
141
+ top_p = gr.Slider(
142
+ label="top_p",
143
+ value=1.0,
144
+ minimum=0.0,
145
+ maximum=1.0,
146
+ step=0.01,
147
+ interactive=True,
148
+ info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it"
149
+ )
150
 
151
+ top_k = gr.Slider(
152
+ label="top_k",
153
+ value=40,
154
+ minimum=0,
155
+ maximum=1000,
156
+ step=1,
157
+ interactive=True,
158
+ info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit."
159
+ )
160
+
161
+ # Function to load model on button click
162
+ def load_model_fn():
163
+ m, t = initialize_model()
164
+ if m is not None and t is not None:
165
+ return "✅ மாதிரி வெற்றிகரமாக ஏற்றப்பட்டது (Model loaded successfully)", m, t
166
+ else:
167
+ return "❌ மாதிரி ஏற்றுவதில் பிழை (Error loading model)", None, None
168
+
169
+ # Function to respond to user messages - fixed to handle tuples format
170
+ def chat_function(message, history, model_state, tokenizer_state, temp, tp, tk):
171
+ # Check if model is loaded
172
+ if model_state is None:
173
+ bot_message = "மாதிரி ஏற்றப்படவில்லை. முதலில் 'மாதிரியை ஏற்று' பொத்தானைக் கிளிக் செய்யவும்."
174
+ else:
175
+ # Generate bot response with parameters
176
+ bot_message = generate_response(
177
+ model_state,
178
+ tokenizer_state,
179
+ message,
180
+ history, # history already in the correct format
181
+ temperature=temp,
182
+ top_p=tp,
183
+ top_k=tk
184
+ )
185
 
186
+ # Return the bot's message to be added to history
187
+ return bot_message
188
 
189
+ # Create the chat interface
190
+ chatbot = gr.Chatbot()
191
+ msg = gr.Textbox(
192
+ show_label=False,
193
+ placeholder="உங்கள் செய்தி இங்கே தட்டச்சு செய்யவும் (Type your message here...)",
194
+ )
195
+ clear = gr.Button("அழி (Clear)")
196
 
197
+ # Set up the chat interface
198
+ msg.submit(
199
+ chat_function,
200
+ [msg, chatbot, model, tokenizer, temperature, top_p, top_k],
201
+ [chatbot],
202
+ queue=True,
203
+ )
204
+ clear.click(lambda: None, None, chatbot, queue=False)
205
 
206
+ # Add examples
207
+ examples = gr.Examples(
208
+ examples=[
209
+ "வணக்கம், நீங்கள் யார்?",
210
+ "நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?",
211
+ "பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்.",
212
+ "சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?",
213
+ "விரைவாக தூங்குவது எப்படி?"
214
+ ],
215
+ inputs=msg,
216
+ )
217
 
218
+ # Connect the model loading button
219
+ load_model_btn.click(
220
+ load_model_fn,
221
+ outputs=[model_status, model, tokenizer]
222
+ )
223
 
224
+ # Add like functionality
225
+ chatbot.like(vote, None, None)
226
 
227
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ # Create and launch the demo
230
+ demo = create_chatbot_interface()
231
 
232
+ # Launch the demo
233
+ if __name__ == "__main__":
234
 
 
235
 
236
+ demo.queue(max_size=3).launch()
237
 
 
 
 
 
238
 
 
 
239
 
 
 
 
 
 
240
 
241
+