Spaces:
Running
Running
| import os | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| from threading import Thread | |
| import gradio as gr | |
| MODEL_NAME = os.getenv('MODEL_ID') | |
| TOKEN = os.getenv('TOKEN') | |
| print("Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=TOKEN) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| token=TOKEN | |
| ) | |
| print("Model loaded.") | |
| def playground( | |
| message, | |
| history, | |
| system_prompt, | |
| max_new_tokens, | |
| temperature, | |
| repetition_penalty, | |
| top_k, | |
| top_p | |
| ): | |
| if not isinstance(message, str) or not message.strip(): | |
| yield "" | |
| return | |
| # Build conversation với system prompt | |
| conversation = [] | |
| # Thêm system prompt nếu có | |
| if system_prompt and system_prompt.strip(): | |
| conversation.append({"role": "system", "content": system_prompt.strip()}) | |
| # Thêm lịch sử chat | |
| for user_msg, bot_msg in history: | |
| conversation.append({"role": "user", "content": user_msg}) | |
| if bot_msg: | |
| conversation.append({"role": "assistant", "content": bot_msg}) | |
| conversation.append({"role": "user", "content": message}) | |
| if hasattr(tokenizer, "apply_chat_template"): | |
| prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) | |
| else: | |
| prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation]) + "\nassistant:" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=int(max_new_tokens), | |
| temperature=float(temperature), | |
| top_k=int(top_k) if top_k > 0 else None, | |
| top_p=float(top_p), | |
| repetition_penalty=float(repetition_penalty), | |
| do_sample=True if temperature > 0 else False, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| generated_text = "" | |
| for new_text in streamer: | |
| generated_text += new_text | |
| yield generated_text | |
| thread.join() | |
| with gr.Blocks(fill_height=True, fill_width=True) as app: | |
| with gr.Sidebar(): | |
| gr.Markdown("## Playground by UltimaX Intelligence") | |
| gr.HTML(""" | |
| Runs <b><a href="https://huggingface.co/beyoru/Qwen3-0.9B-A0.6B" target="_blank"> | |
| beyoru/Qwen3-0.9B-A0.6B</a></b> via <b>Hugging Face Transformers</b>.<br><br> | |
| <b>Support me at:</b>.<br><br> | |
| <a href="https://www.buymeacoffee.com/ductransa0g" target="_blank"> | |
| <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" width="150px"> | |
| </a> | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("## System Prompt") | |
| system_prompt = gr.Textbox( | |
| label="System Prompt", | |
| placeholder="Enter custom system instructions here (optional)...", | |
| lines=4, | |
| value="You are a helpful AI assistant.", | |
| info="AI role custome" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("## Generation Parameters") | |
| max_new_tokens = gr.Slider(32, 4096, value=2048, step=32, label="Max New Tokens") | |
| temperature = gr.Slider(0.1, 2.0, value=0.6, step=0.1, label="Temperature") | |
| repetition_penalty = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Repetition Penalty") | |
| top_k = gr.Slider(0, 100, value=20, step=1, label="Top K (0 = off)") | |
| top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.05, label="Top P") | |
| gr.ChatInterface( | |
| fn=playground, | |
| additional_inputs=[system_prompt, max_new_tokens, temperature, repetition_penalty, top_k, top_p], | |
| chatbot=gr.Chatbot( | |
| label="Qwen3-0.9B-A0.6B", | |
| show_copy_button=True, | |
| allow_tags=["think"], | |
| ), | |
| examples=[ | |
| ["Hello who are you?"], | |
| ["How to solve 2x+1=3."], | |
| ["Example python code for async"] | |
| ], | |
| cache_examples=False, | |
| show_api=False | |
| ) | |
| app.launch(server_name="0.0.0.0", pwa=True) |