Spaces:
Sleeping
Sleeping
sync
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ def chat_with_moi(message, history, temperature, top_p, beta):
|
|
| 15 |
# launch_vllm_server(beta=beta)
|
| 16 |
|
| 17 |
payload = {
|
| 18 |
-
"model": "Qwen/
|
| 19 |
"messages": [{"role": "user", "content": message}],
|
| 20 |
"temperature": temperature,
|
| 21 |
"top_p": top_p,
|
|
|
|
| 15 |
# launch_vllm_server(beta=beta)
|
| 16 |
|
| 17 |
payload = {
|
| 18 |
+
"model": "Qwen/Qwen3-4B", # match what your vLLM server expects
|
| 19 |
"messages": [{"role": "user", "content": message}],
|
| 20 |
"temperature": temperature,
|
| 21 |
"top_p": top_p,
|
server.py
CHANGED
|
@@ -8,7 +8,7 @@ def setup_mixinputs():
|
|
| 8 |
# Step 1: Run mixinputs setup
|
| 9 |
subprocess.run(["mixinputs", "setup"], check=True)
|
| 10 |
|
| 11 |
-
@spaces.GPU(duration=240)
|
| 12 |
def launch_vllm_server(beta=1.0):
|
| 13 |
# Step 2: Set environment variables
|
| 14 |
env = os.environ.copy()
|
|
@@ -18,7 +18,7 @@ def launch_vllm_server(beta=1.0):
|
|
| 18 |
# Step 3: Launch vLLM with custom options
|
| 19 |
cmd = [
|
| 20 |
"vllm", "serve",
|
| 21 |
-
"Qwen/
|
| 22 |
"--tensor-parallel-size", "1",
|
| 23 |
"--enforce-eager",
|
| 24 |
"--max-seq-len-to-capture", "2048",
|
|
|
|
| 8 |
# Step 1: Run mixinputs setup
|
| 9 |
subprocess.run(["mixinputs", "setup"], check=True)
|
| 10 |
|
| 11 |
+
# @spaces.GPU(duration=240)
|
| 12 |
def launch_vllm_server(beta=1.0):
|
| 13 |
# Step 2: Set environment variables
|
| 14 |
env = os.environ.copy()
|
|
|
|
| 18 |
# Step 3: Launch vLLM with custom options
|
| 19 |
cmd = [
|
| 20 |
"vllm", "serve",
|
| 21 |
+
"Qwen/Qwen3-4B",
|
| 22 |
"--tensor-parallel-size", "1",
|
| 23 |
"--enforce-eager",
|
| 24 |
"--max-seq-len-to-capture", "2048",
|