| import gradio as gr |
| import spaces |
| import torch |
| from transformers import pipeline |
|
|
| MODEL_ID = "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive" |
|
|
| pipe = None |
|
|
| def load_model(): |
| global pipe |
| if pipe is None: |
| pipe = pipeline( |
| "text-generation", |
| model=MODEL_ID, |
| device_map="auto" |
| ) |
|
|
| @spaces.GPU |
| def chat_fn(message, history): |
| load_model() |
|
|
| outputs = pipe( |
| message, |
| max_new_tokens=256, |
| do_sample=True, |
| temperature=0.7, |
| return_full_text=False, |
| ) |
| return outputs[0]["generated_text"] |
|
|
| demo = gr.ChatInterface(fn=chat_fn) |
|
|
| if __name__ == "__main__": |
| demo.launch() |