tommytracx commited on
Commit
e062f4f
·
verified ·
1 Parent(s): 71e67c8

Upload app (1).py

Browse files
Files changed (1) hide show
  1. app (1).py +474 -0
app (1).py ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from flask import Flask, request, jsonify, Response
3
+ import os
4
+ import requests
5
+ import json
6
+ import logging
7
+ from logging.handlers import RotatingFileHandler
8
+ from typing import Dict, Any, List
9
+ import time
10
+ import socket
11
+
12
+ app = Flask(__name__)
13
+
14
+ # Configure logging with file output
15
+ log_handler = RotatingFileHandler('/home/ollama/server.log', maxBytes=1000000, backupCount=5)
16
+ log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
17
+ logging.getLogger().addHandler(log_handler)
18
+ logging.getLogger().setLevel(logging.INFO)
19
+
20
+ # Configuration
21
+ OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
22
+ ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
23
+ MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
24
+ TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
25
+
26
+ class OllamaManager:
27
+ def __init__(self, base_url: str):
28
+ self.base_url = base_url.rstrip('/')
29
+ self.available_models = ALLOWED_MODELS # Initialize with allowed models
30
+ self.refresh_models()
31
+
32
+ def refresh_models(self) -> None:
33
+ """Refresh the list of available models from local Ollama API, falling back to ALLOWED_MODELS."""
34
+ try:
35
+ response = requests.get(f"{self.base_url}/api/tags", timeout=10)
36
+ response.raise_for_status()
37
+ data = response.json()
38
+ models = [model['name'] for model in data.get('models', [])]
39
+ # Filter models to only include those in ALLOWED_MODELS
40
+ self.available_models = [model for model in models if model in ALLOWED_MODELS]
41
+ if not self.available_models:
42
+ self.available_models = ALLOWED_MODELS
43
+ logging.warning("No allowed models found in API response, using ALLOWED_MODELS")
44
+ logging.info(f"Available models: {self.available_models}")
45
+ except requests.exceptions.ConnectionError as e:
46
+ logging.error(f"Connection error while refreshing models: {e}")
47
+ self.available_models = ALLOWED_MODELS
48
+ except requests.exceptions.HTTPError as e:
49
+ logging.error(f"HTTP error while refreshing models: {e}")
50
+ self.available_models = ALLOWED_MODELS
51
+ except Exception as e:
52
+ logging.error(f"Unexpected error refreshing models: {e}")
53
+ self.available_models = ALLOWED_MODELS
54
+
55
+ def list_models(self) -> List[str]:
56
+ """Return the list of available models without refreshing."""
57
+ return self.available_models
58
+
59
+ def pull_model(self, model_name: str) -> Dict[str, Any]:
60
+ """Pull a model from Ollama."""
61
+ if model_name not in ALLOWED_MODELS:
62
+ logging.warning(f"Attempted to pull unauthorized model: {model_name}")
63
+ return {"status": "error", "message": f"Model {model_name} not in allowed list"}
64
+
65
+ try:
66
+ response = requests.post(f"{self.base_url}/api/pull", json={"name": model_name}, timeout=300)
67
+ response.raise_for_status()
68
+ self.refresh_models() # Refresh models after pulling
69
+ logging.info(f"Successfully pulled model: {model_name}")
70
+ return {"status": "success", "model": model_name}
71
+ except requests.exceptions.ConnectionError as e:
72
+ logging.error(f"Connection error pulling model {model_name}: {e}")
73
+ return {"status": "error", "message": f"Connection error: {str(e)}"}
74
+ except requests.exceptions.HTTPError as e:
75
+ logging.error(f"HTTP error pulling model {model_name}: {e}")
76
+ return {"status": "error", "message": f"HTTP error: {str(e)}"}
77
+ except Exception as e:
78
+ logging.error(f"Unexpected error pulling model {model_name}: {e}")
79
+ return {"status": "error", "message": str(e)}
80
+
81
+ def generate(self, model_name: str, prompt: str, stream: bool = False, **kwargs) -> Any:
82
+ """Generate text using a model, with optional streaming."""
83
+ if model_name not in self.available_models:
84
+ logging.warning(f"Attempted to generate with unavailable model: {model_name}")
85
+ return {"status": "error", "message": f"Model {model_name} not available"}
86
+
87
+ try:
88
+ payload = {
89
+ "model": model_name,
90
+ "prompt": prompt,
91
+ "stream": stream,
92
+ **kwargs
93
+ }
94
+ if stream:
95
+ response = requests.post(f"{self.base_url}/api/generate", json=payload, stream=True, timeout=120)
96
+ response.raise_for_status()
97
+ return response
98
+ else:
99
+ response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
100
+ response.raise_for_status()
101
+ data = response.json()
102
+ logging.info(f"Generated response with model {model_name}")
103
+ return {
104
+ "status": "success",
105
+ "response": data.get('response', ''),
106
+ "model": model_name,
107
+ "usage": data.get('usage', {})
108
+ }
109
+ except requests.exceptions.ConnectionError as e:
110
+ logging.error(f"Connection error generating response with model {model_name}: {e}")
111
+ return {"status": "error", "message": f"Connection error: {str(e)}"}
112
+ except requests.exceptions.HTTPError as e:
113
+ logging.error(f"HTTP error generating response with model {model_name}: {e}")
114
+ return {"status": "error", "message": f"HTTP error: {str(e)}"}
115
+ except Exception as e:
116
+ logging.error(f"Unexpected error generating response with model {model_name}: {e}")
117
+ return {"status": "error", "message": str(e)}
118
+
119
+ # Initialize Ollama manager
120
+ ollama_manager = OllamaManager(OLLAMA_BASE_URL)
121
+
122
+ # HTML template for the home page with modernized UI
123
+ HTML_TEMPLATE = '''
124
+ <!DOCTYPE html>
125
+ <html lang="en">
126
+ <head>
127
+ <meta charset="UTF-8">
128
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
129
+ <title>Ollama API Space</title>
130
+ <style>
131
+ :root {
132
+ --primary-color: #667eea;
133
+ --secondary-color: #764ba2;
134
+ --text-color: #333;
135
+ --bg-color: #fafbfc;
136
+ --border-color: #e9ecef;
137
+ --input-bg: white;
138
+ }
139
+ .dark-mode {
140
+ --primary-color: #3b4a8c;
141
+ --secondary-color: #4a2e6b;
142
+ --text-color: #f0f0f0;
143
+ --bg-color: #1a1a1a;
144
+ --border-color: #4a4a4a;
145
+ --input-bg: #3a3a3a;
146
+ }
147
+ * {
148
+ margin: 0;
149
+ padding: 0;
150
+ box-sizing: border-box;
151
+ }
152
+ body {
153
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
154
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
155
+ color: var(--text-color);
156
+ min-height: 100vh;
157
+ padding: 20px;
158
+ }
159
+ .container {
160
+ max-width: 900px;
161
+ margin: 0 auto;
162
+ background: var(--bg-color);
163
+ border-radius: 20px;
164
+ box-shadow: 0 20px 40px rgba(0,0,0,0.1);
165
+ padding: 30px;
166
+ position: relative;
167
+ }
168
+ .theme-toggle {
169
+ position: absolute;
170
+ top: 15px;
171
+ right: 15px;
172
+ background: none;
173
+ border: none;
174
+ cursor: pointer;
175
+ font-size: 1.5rem;
176
+ color: var(--text-color);
177
+ transition: transform 0.2s;
178
+ }
179
+ .theme-toggle:hover {
180
+ transform: scale(1.1);
181
+ }
182
+ h1 {
183
+ font-size: 2.8rem;
184
+ margin-bottom: 20px;
185
+ text-align: center;
186
+ font-weight: 700;
187
+ }
188
+ p {
189
+ font-size: 1.2rem;
190
+ line-height: 1.6;
191
+ margin-bottom: 20px;
192
+ }
193
+ h2 {
194
+ font-size: 1.8rem;
195
+ margin-top: 30px;
196
+ margin-bottom: 15px;
197
+ }
198
+ .endpoint {
199
+ background: var(--border-color);
200
+ padding: 20px;
201
+ margin: 15px 0;
202
+ border-radius: 10px;
203
+ transition: transform 0.2s;
204
+ }
205
+ .endpoint:hover {
206
+ transform: translateY(-2px);
207
+ }
208
+ .method {
209
+ background: var(--primary-color);
210
+ color: white;
211
+ padding: 5px 12px;
212
+ border-radius: 5px;
213
+ font-size: 14px;
214
+ margin-right: 10px;
215
+ }
216
+ .url {
217
+ font-family: monospace;
218
+ background: var(--input-bg);
219
+ padding: 5px 10px;
220
+ border-radius: 5px;
221
+ color: var(--text-color);
222
+ }
223
+ pre {
224
+ background: var(--border-color);
225
+ padding: 20px;
226
+ border-radius: 10px;
227
+ overflow-x: auto;
228
+ font-size: 14px;
229
+ }
230
+ code {
231
+ font-family: monospace;
232
+ font-size: 14px;
233
+ }
234
+ .dark-mode pre, .dark-mode .endpoint {
235
+ background: #2a2a2a;
236
+ }
237
+ .status {
238
+ text-align: center;
239
+ padding: 10px;
240
+ font-size: 14px;
241
+ color: #6c757d;
242
+ }
243
+ .status.error {
244
+ color: #dc3545;
245
+ }
246
+ .status.success {
247
+ color: #28a745;
248
+ }
249
+ @media (max-width: 768px) {
250
+ .container {
251
+ padding: 20px;
252
+ }
253
+ h1 {
254
+ font-size: 2.2rem;
255
+ }
256
+ h2 {
257
+ font-size: 1.5rem;
258
+ }
259
+ }
260
+ </style>
261
+ </head>
262
+ <body>
263
+ <div class="container">
264
+ <button class="theme-toggle" id="theme-toggle">🌙</button>
265
+ <h1>🚀 Ollama API Space</h1>
266
+ <p>This Space provides a robust API for managing and interacting with Ollama models, optimized for integration with OpenWebUI and other clients. It supports model listing, pulling, and text generation with streaming capabilities.</p>
267
+
268
+ <h2>Available Endpoints</h2>
269
+
270
+ <div class="endpoint">
271
+ <span class="method">GET</span> <span class="url">/api/models</span>
272
+ <p>List all available models filtered by ALLOWED_MODELS ({{ allowed_models }}).</p>
273
+ <p><strong>Response:</strong> <code>{"status": "success", "models": [...], "count": N}</code></p>
274
+ </div>
275
+
276
+ <div class="endpoint">
277
+ <span class="method">POST</span> <span class="url">/api/models/pull</span>
278
+ <p>Pull a model from Ollama, restricted to ALLOWED_MODELS.</p>
279
+ <p><strong>Body:</strong> <code>{"name": "model_name"}</code></p>
280
+ <p><strong>Response:</strong> <code>{"status": "success", "model": "model_name"}</code></p>
281
+ </div>
282
+
283
+ <div class="endpoint">
284
+ <span class="method">POST</span> <span class="url">/api/generate</span>
285
+ <p>Generate text using a model, with optional streaming.</p>
286
+ <p><strong>Body:</strong> <code>{"model": "model_name", "prompt": "your prompt", "stream": boolean, "temperature": float, "max_tokens": int}</code></p>
287
+ <p><strong>Response (non-streaming):</strong> <code>{"status": "success", "response": "...", "model": "...", "usage": {...}}</code></p>
288
+ <p><strong>Response (streaming):</strong> Stream of JSON objects: <code>{"response": "..."}</code></p>
289
+ </div>
290
+
291
+ <div class="endpoint">
292
+ <span class="method">GET</span> <span class="url">/health</span>
293
+ <p>Health check endpoint for the API and Ollama connection.</p>
294
+ <p><strong>Response:</strong> <code>{"status": "healthy", "ollama_connection": "connected", "available_models": N, "timestamp": N}</code></p>
295
+ </div>
296
+
297
+ <h2>Usage Examples</h2>
298
+ <p>Use this API with OpenWebUI or any REST client. Ensure models are in ALLOWED_MODELS: {{ allowed_models }}.</p>
299
+
300
+ <h3>cURL Examples</h3>
301
+ <pre>
302
+ # List models
303
+ curl {{ ollama_base_url }}/api/models
304
+
305
+ # Pull a model
306
+ curl -X POST {{ ollama_base_url }}/api/models/pull \
307
+ -H "Content-Type: application/json" \
308
+ -d '{"name": "gemma-3-270m"}'
309
+
310
+ # Generate text (non-streaming)
311
+ curl -X POST {{ ollama_base_url }}/api/generate \
312
+ -H "Content-Type: application/json" \
313
+ -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "temperature": 0.7, "max_tokens": 2048}'
314
+
315
+ # Generate text (streaming)
316
+ curl -X POST {{ ollama_base_url }}/api/generate \
317
+ -H "Content-Type: application/json" \
318
+ -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "stream": true}'
319
+ </pre>
320
+
321
+ <div class="status" id="status"></div>
322
+ </div>
323
+
324
+ <script>
325
+ document.addEventListener('DOMContentLoaded', async function() {
326
+ const themeToggle = document.getElementById('theme-toggle');
327
+ themeToggle.addEventListener('click', function() {
328
+ document.body.classList.toggle('dark-mode');
329
+ themeToggle.textContent = document.body.classList.contains('dark-mode') ? '☀️' : '🌙';
330
+ localStorage.setItem('theme', document.body.classList.contains('dark-mode') ? 'dark' : 'light');
331
+ });
332
+ if (localStorage.getItem('theme') === 'dark') {
333
+ document.body.classList.add('dark-mode');
334
+ themeToggle.textContent = '☀️';
335
+ }
336
+
337
+ // Fetch and display available models
338
+ try {
339
+ const response = await fetch('/api/models');
340
+ const data = await response.json();
341
+ const statusDiv = document.getElementById('status');
342
+ if (data.status === 'success') {
343
+ statusDiv.textContent = `Available models: ${data.models.join(', ')}`;
344
+ statusDiv.className = 'status success';
345
+ } else {
346
+ statusDiv.textContent = `Error loading models: ${data.message}`;
347
+ statusDiv.className = 'status error';
348
+ }
349
+ } catch (error) {
350
+ const statusDiv = document.getElementById('status');
351
+ statusDiv.textContent = `Error loading models: ${error.message}`;
352
+ statusDiv.className = 'status error';
353
+ }
354
+ });
355
+ </script>
356
+ </body>
357
+ </html>
358
+ '''
359
+
360
+ @app.route('/')
361
+ def home():
362
+ """Home page with API documentation."""
363
+ return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, allowed_models=', '.join(ALLOWED_MODELS))
364
+
365
+ @app.route('/api/models', methods=['GET'])
366
+ def list_models():
367
+ """List all available models."""
368
+ try:
369
+ models = ollama_manager.list_models()
370
+ logging.info(f"Returning models: {models}")
371
+ return jsonify({
372
+ "status": "success",
373
+ "models": models,
374
+ "count": len(models)
375
+ })
376
+ except Exception as e:
377
+ logging.error(f"Models endpoint error: {e}")
378
+ return jsonify({"status": "error", "message": str(e)}), 500
379
+
380
+ @app.route('/api/models/pull', methods=['POST'])
381
+ def pull_model():
382
+ """Pull a model from Ollama."""
383
+ try:
384
+ data = request.get_json()
385
+ if not data or 'name' not in data:
386
+ logging.warning("Model pull request missing 'name' field")
387
+ return jsonify({"status": "error", "message": "Model name is required"}), 400
388
+
389
+ model_name = data['name']
390
+ if model_name not in ALLOWED_MODELS:
391
+ logging.warning(f"Attempted to pull unauthorized model: {model_name}")
392
+ return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
393
+
394
+ result = ollama_manager.pull_model(model_name)
395
+ return jsonify(result), 200 if result["status"] == "success" else 500
396
+ except Exception as e:
397
+ logging.error(f"Pull model endpoint error: {e}")
398
+ return jsonify({"status": "error", "message": str(e)}), 500
399
+
400
+ @app.route('/api/generate', methods=['POST'])
401
+ def generate_text():
402
+ """Generate text using a model, with optional streaming."""
403
+ try:
404
+ data = request.get_json()
405
+ if not data or 'model' not in data or 'prompt' not in data:
406
+ logging.warning("Generate request missing 'model' or 'prompt' field")
407
+ return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
408
+
409
+ model_name = data['model']
410
+ prompt = data['prompt']
411
+ stream = data.get('stream', False)
412
+ kwargs = {k: v for k in data if k not in ['model', 'prompt', 'stream']}
413
+
414
+ result = ollama_manager.generate(model_name, prompt, stream=stream, **kwargs)
415
+
416
+ if stream and isinstance(result, requests.Response):
417
+ def generate_stream():
418
+ try:
419
+ for chunk in result.iter_content(chunk_size=None):
420
+ yield chunk
421
+ except Exception as e:
422
+ logging.error(f"Streaming error: {e}")
423
+ yield json.dumps({"status": "error", "message": str(e)}).encode()
424
+ return Response(generate_stream(), content_type='application/json')
425
+ else:
426
+ return jsonify(result), 200 if result["status"] == "success" else 500
427
+ except Exception as e:
428
+ logging.error(f"Generate endpoint error: {e}")
429
+ return jsonify({"status": "error", "message": str(e)}), 500
430
+
431
+ @app.route('/health', methods=['GET'])
432
+ def health_check():
433
+ """Health check endpoint."""
434
+ try:
435
+ response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
436
+ response.raise_for_status()
437
+ logging.info("Health check successful")
438
+ return jsonify({
439
+ "status": "healthy",
440
+ "ollama_connection": "connected",
441
+ "available_models": len(ollama_manager.available_models),
442
+ "timestamp": time.time(),
443
+ "hostname": socket.gethostname()
444
+ })
445
+ except requests.exceptions.ConnectionError as e:
446
+ logging.error(f"Health check connection error: {e}")
447
+ return jsonify({
448
+ "status": "unhealthy",
449
+ "ollama_connection": "failed",
450
+ "error": f"Connection error: {str(e)}",
451
+ "timestamp": time.time(),
452
+ "hostname": socket.gethostname()
453
+ }), 503
454
+ except requests.exceptions.HTTPError as e:
455
+ logging.error(f"Health check HTTP error: {e}")
456
+ return jsonify({
457
+ "status": "unhealthy",
458
+ "ollama_connection": "failed",
459
+ "error": f"HTTP error: {str(e)}",
460
+ "timestamp": time.time(),
461
+ "hostname": socket.gethostname()
462
+ }), 503
463
+ except Exception as e:
464
+ logging.error(f"Health check unexpected error: {e}")
465
+ return jsonify({
466
+ "status": "unhealthy",
467
+ "ollama_connection": "failed",
468
+ "error": str(e),
469
+ "timestamp": time.time(),
470
+ "hostname": socket.gethostname()
471
+ }), 503
472
+
473
+ if __name__ == '__main__':
474
+ app.run(host='0.0.0.0', port=7860, debug=False)