Spaces:

tommytracx
/

ollama-api

Sleeping

App Files Files Community

tommytracx commited on Aug 25

Commit

28c2439

verified ·

1 Parent(s): a46c1ac

Update app.py

Browse files

Files changed (1) hide show

app.py +476 -160

app.py CHANGED Viewed

@@ -1,149 +1,513 @@
-from flask import Flask, request, jsonify
 import os
-import subprocess
 import json
 import logging
 from typing import Dict, Any, List
-import requests
 app = Flask(__name__)
-logging.basicConfig(level=logging.INFO)
 # Configuration
 OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
-MODELS_DIR = os.getenv('MODELS_DIR', '/models')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
 class OllamaManager:
     def __init__(self, base_url: str):
-        self.base_url = base_url
-        self.available_models = []
         self.refresh_models()
-    def refresh_models(self):
-        """Refresh the list of available models"""
         try:
             response = requests.get(f"{self.base_url}/api/tags", timeout=10)
-            if response.status_code == 200:
-                data = response.json()
-                self.available_models = [model['name'] for model in data.get('models', [])]
-            else:
-                self.available_models = []
         except Exception as e:
             logging.error(f"Error refreshing models: {e}")
-            self.available_models = []
     def list_models(self) -> List[str]:
-        """List all available models"""
-        self.refresh_models()
         return self.available_models
-    def pull_model(self, model_name: str) -> Dict[str, Any]:
-        """Pull a model from Ollama"""
-        try:
-            response = requests.post(f"{self.base_url}/api/pull",
-                                  json={"name": model_name},
-                                  timeout=300)
-            if response.status_code == 200:
-                return {"status": "success", "model": model_name}
-            else:
-                return {"status": "error", "message": f"Failed to pull model: {response.text}"}
-        except Exception as e:
-            return {"status": "error", "message": str(e)}
     def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
-        """Generate text using a model"""
         try:
             payload = {
                 "model": model_name,
                 "prompt": prompt,
-                "stream": False
             }
-            payload.update(kwargs)
-            response = requests.post(f"{self.base_url}/api/generate",
-                                  json=payload,
-                                  timeout=120)
-            if response.status_code == 200:
-                data = response.json()
-                return {
-                    "status": "success",
-                    "response": data.get('response', ''),
-                    "model": model_name,
-                    "usage": data.get('usage', {})
-                }
-            else:
-                return {"status": "error", "message": f"Generation failed: {response.text}"}
         except Exception as e:
             return {"status": "error", "message": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
-@app.route('/')
-def home():
-    """Home page with API documentation"""
-    return '''
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>Ollama API Space</title>
-        <style>
-            body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
-            .endpoint { background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }
-            .method { background: #007bff; color: white; padding: 2px 8px; border-radius: 3px; font-size: 12px; }
-            .url { font-family: monospace; background: #e9ecef; padding: 2px 6px; border-radius: 3px; }
-        </style>
-    </head>
-    <body>
-        <h1>🚀 Ollama API Space</h1>
-        <p>This Space provides API endpoints for Ollama model management and inference.</p>
-        <h2>Available Endpoints</h2>
-        <div class="endpoint">
-            <span class="method">GET</span> <span class="url">/api/models</span>
-            <p>List all available models</p>
         </div>
-        <div class="endpoint">
-            <span class="method">POST</span> <span class="url">/api/models/pull</span>
-            <p>Pull a model from Ollama</p>
-            <p>Body: {"name": "model_name"}</p>
         </div>
-        <div class="endpoint">
-            <span class="method">POST</span> <span class="url">/api/generate</span>
-            <p>Generate text using a model</p>
-            <p>Body: {"model": "model_name", "prompt": "your prompt"}</p>
         </div>
-        <div class="endpoint">
-            <span class="method">GET</span> <span class="url">/health</span>
-            <p>Health check endpoint</p>
         </div>
-        <h2>Usage Examples</h2>
-        <p>You can use this API with OpenWebUI or any other client that supports REST APIs.</p>
-        <h3>cURL Examples</h3>
-        <pre>
-# List models
-curl https://your-space-url.hf.space/api/models
-# Generate text
-curl -X POST https://your-space-url.hf.space/api/generate \
-  -H "Content-Type: application/json" \
-  -d '{"model": "llama2", "prompt": "Hello, how are you?"}'
-        </pre>
-    </body>
-    </html>
-    '''
 @app.route('/api/models', methods=['GET'])
-def list_models():
-    """List all available models"""
     try:
         models = ollama_manager.list_models()
         return jsonify({
@@ -152,74 +516,26 @@ def list_models():
             "count": len(models)
         })
     except Exception as e:
-        return jsonify({"status": "error", "message": str(e)}), 500
-@app.route('/api/models/pull', methods=['POST'])
-def pull_model():
-    """Pull a model from Ollama"""
-    try:
-        data = request.get_json()
-        if not data or 'name' not in data:
-            return jsonify({"status": "error", "message": "Model name is required"}), 400
-        model_name = data['name']
-        if model_name not in ALLOWED_MODELS:
-            return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
-        result = ollama_manager.pull_model(model_name)
-        if result["status"] == "success":
-            return jsonify(result), 200
-        else:
-            return jsonify(result), 500
-    except Exception as e:
-        return jsonify({"status": "error", "message": str(e)}), 500
-@app.route('/api/generate', methods=['POST'])
-def generate_text():
-    """Generate text using a model"""
-    try:
-        data = request.get_json()
-        if not data or 'model' not in data or 'prompt' not in data:
-            return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
-        model_name = data['model']
-        prompt = data['prompt']
-        # Remove additional parameters that might be passed
-        kwargs = {k: v for k, v in data.items() if k not in ['model', 'prompt']}
-        result = ollama_manager.generate(model_name, prompt, **kwargs)
-        if result["status"] == "success":
-            return jsonify(result), 200
-        else:
-            return jsonify(result), 500
-    except Exception as e:
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
-    """Health check endpoint"""
     try:
-        # Try to connect to Ollama
-        response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
-        if response.status_code == 200:
-            return jsonify({
-                "status": "healthy",
-                "ollama_connection": "connected",
-                "available_models": len(ollama_manager.available_models)
-            })
-        else:
-            return jsonify({
-                "status": "unhealthy",
-                "ollama_connection": "failed",
-                "error": f"Ollama returned status {response.status_code}"
-            }), 503
     except Exception as e:
         return jsonify({
             "status": "unhealthy",
-            "ollama_connection": "failed",
-            "error": str(e)
-        }), 503
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860, debug=False)

+# app.py
+from flask import Flask, request, jsonify, render_template_string
 import os
+import requests
 import json
 import logging
 from typing import Dict, Any, List
+import time
 app = Flask(__name__)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Configuration
 OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
+MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
+TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
 class OllamaManager:
     def __init__(self, base_url: str):
+        self.base_url = base_url.rstrip('/')
+        self.available_models = ALLOWED_MODELS  # Initialize with allowed models
         self.refresh_models()
+    def refresh_models(self) -> None:
+        """Refresh the list of available models from Ollama API, falling back to allowed models."""
         try:
             response = requests.get(f"{self.base_url}/api/tags", timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            models = [model['name'] for model in data.get('models', [])]
+            # Filter models to only include those in ALLOWED_MODELS
+            self.available_models = [model for model in models if model in ALLOWED_MODELS]
+            if not self.available_models:
+                self.available_models = ALLOWED_MODELS
+                logging.warning("No allowed models found in API response, using ALLOWED_MODELS")
+            logging.info(f"Available models: {self.available_models}")
         except Exception as e:
             logging.error(f"Error refreshing models: {e}")
+            self.available_models = ALLOWED_MODELS
     def list_models(self) -> List[str]:
+        """Return the list of available models."""
         return self.available_models
     def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
+        """Generate text using a model."""
+        if model_name not in self.available_models:
+            return {"status": "error", "message": f"Model {model_name} not available"}
         try:
             payload = {
                 "model": model_name,
                 "prompt": prompt,
+                "stream": False,
+                **kwargs
+            }
+            response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
+            response.raise_for_status()
+            data = response.json()
+            return {
+                "status": "success",
+                "response": data.get('response', ''),
+                "model": model_name,
+                "usage": data.get('usage', {})
             }
         except Exception as e:
+            logging.error(f"Error generating response: {e}")
             return {"status": "error", "message": str(e)}
+    def health_check(self) -> Dict[str, Any]:
+        """Check the health of the Ollama API."""
+        try:
+            response = requests.get(f"{self.base_url}/api/tags", timeout=10)
+            response.raise_for_status()
+            return {"status": "healthy", "available_models": len(self.available_models)}
+        except Exception as e:
+            logging.error(f"Health check failed: {e}")
+            return {"status": "unhealthy", "error": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
+# HTML template for the chat interface (unchanged from original)
+HTML_TEMPLATE = '''
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>OpenWebUI - Ollama Chat</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 20px;
+            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
+            overflow: hidden;
+        }
+        .header {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 30px;
+            text-align: center;
+        }
+        .header h1 {
+            font-size: 2.5rem;
+            margin-bottom: 10px;
+            font-weight: 700;
+        }
+        .header p {
+            font-size: 1.1rem;
+            opacity: 0.9;
+        }
+        .controls {
+            padding: 20px 30px;
+            background: #f8f9fa;
+            border-bottom: 1px solid #e9ecef;
+            display: flex;
+            gap: 15px;
+            align-items: center;
+            flex-wrap: wrap;
+        }
+        .control-group {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+        .control-group label {
+            font-weight: 600;
+            color: #495057;
+            min-width: 80px;
+        }
+        .control-group select,
+        .control-group input {
+            padding: 8px 12px;
+            border: 2px solid #e9ecef;
+            border-radius: 8px;
+            font-size: 14px;
+            transition: border-color 0.3s;
+        }
+        .control-group select:focus,
+        .control-group input:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        .chat-container {
+            height: 500px;
+            overflow-y: auto;
+            padding: 20px;
+            background: #fafbfc;
+        }
+        .message {
+            margin-bottom: 20px;
+            display: flex;
+            gap: 15px;
+        }
+        .message.user {
+            flex-direction: row-reverse;
+        }
+        .message-avatar {
+            width: 40px;
+            height: 40px;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-weight: bold;
+            color: white;
+            flex-shrink: 0;
+        }
+        .message.user .message-avatar {
+            background: #667eea;
+        }
+        .message.assistant .message-avatar {
+            background: #28a745;
+        }
+        .message-content {
+            background: white;
+            padding: 15px 20px;
+            border-radius: 18px;
+            max-width: 70%;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+            line-height: 1.5;
+        }
+        .message.user .message-content {
+            background: #667eea;
+            color: white;
+        }
+        .message.assistant .message-content {
+            background: white;
+            color: #333;
+        }
+        .input-container {
+            padding: 20px 30px;
+            background: white;
+            border-top: 1px solid #e9ecef;
+        }
+        .input-form {
+            display: flex;
+            gap: 15px;
+        }
+        .input-field {
+            flex: 1;
+            padding: 15px 20px;
+            border: 2px solid #e9ecef;
+            border-radius: 25px;
+            font-size: 16px;
+            transition: border-color 0.3s;
+            resize: none;
+            min-height: 50px;
+            max-height: 120px;
+        }
+        .input-field:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+        .send-button {
+            padding: 15px 30px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            border-radius: 25px;
+            font-size: 16px;
+            font-weight: 600;
+            cursor: pointer;
+            transition: transform 0.2s;
+            min-width: 100px;
+        }
+        .send-button:hover {
+            transform: translateY(-2px);
+        }
+        .send-button:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .status {
+            text-align: center;
+            padding: 10px;
+            font-size: 14px;
+            color: #6c757d;
+        }
+        .status.error {
+            color: #dc3545;
+        }
+        .status.success {
+            color: #28a745;
+        }
+        .typing-indicator {
+            display: none;
+            padding: 15px 20px;
+            background: white;
+            border-radius: 18px;
+            color: #6c757d;
+            font-style: italic;
+        }
+        @media (max-width: 768px) {
+            .controls {
+                flex-direction: column;
+                align-items: stretch;
+            }
+            .control-group {
+                justify-content: space-between;
+            }
+            .message-content {
+                max-width: 85%;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>🤖 OpenWebUI</h1>
+            <p>Chat with your local Ollama models through Hugging Face Spaces</p>
         </div>
+        <div class="controls">
+            <div class="control-group">
+                <label for="model-select">Model:</label>
+                <select id="model-select">
+                    <option value="">Select a model...</option>
+                </select>
+            </div>
+            <div class="control-group">
+                <label for="temperature">Temperature:</label>
+                <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7">
+                <span id="temp-value">0.7</span>
+            </div>
+            <div class="control-group">
+                <label for="max-tokens">Max Tokens:</label>
+                <input type="number" id="max-tokens" min="1" max="4096" value="2048">
+            </div>
         </div>
+        <div class="chat-container" id="chat-container">
+            <div class="message assistant">
+                <div class="message-avatar">AI</div>
+                <div class="message-content">
+                    Hello! I'm your AI assistant powered by Ollama. How can I help you today?
+                </div>
+            </div>
         </div>
+        <div class="typing-indicator" id="typing-indicator">
+            AI is thinking...
         </div>
+        <div class="input-container">
+            <form class="input-form" id="chat-form">
+                <textarea
+                    class="input-field"
+                    id="message-input"
+                    placeholder="Type your message here..."
+                    rows="1"
+                ></textarea>
+                <button type="submit" class="send-button" id="send-button">
+                    Send
+                </button>
+            </form>
+        </div>
+        <div class="status" id="status"></div>
+    </div>
+    <script>
+        let conversationHistory = [];
+        document.addEventListener('DOMContentLoaded', function() {
+            loadModels();
+            setupEventListeners();
+            autoResizeTextarea();
+        });
+        async function loadModels() {
+            const modelSelect = document.getElementById('model-select');
+            modelSelect.innerHTML = '<option value="">Loading models...</option>';
+            try {
+                const response = await fetch('/api/models');
+                const data = await response.json();
+                modelSelect.innerHTML = '<option value="">Select a model...</option>';
+                if (data.status === 'success' && data.models.length > 0) {
+                    data.models.forEach(model => {
+                        const option = document.createElement('option');
+                        option.value = model;
+                        option.textContent = model;
+                        if (model === 'gemma-3-270m') {
+                            option.selected = true;
+                        }
+                        modelSelect.appendChild(option);
+                    });
+                    showStatus('Models loaded successfully', 'success');
+                } else {
+                    modelSelect.innerHTML = '<option value="">No models available</option>';
+                    showStatus('No models available from API', 'error');
+                }
+            } catch (error) {
+                console.error('Error loading models:', error);
+                modelSelect.innerHTML = '<option value="">No models available</option>';
+                showStatus('Failed to load models: ' + error.message, 'error');
+            }
+        }
+        function setupEventListeners() {
+            document.getElementById('chat-form').addEventListener('submit', handleSubmit);
+            document.getElementById('temperature').addEventListener('input', function() {
+                document.getElementById('temp-value').textContent = this.value;
+            });
+            document.getElementById('message-input').addEventListener('input', autoResizeTextarea);
+        }
+        function autoResizeTextarea() {
+            const textarea = document.getElementById('message-input');
+            textarea.style.height = 'auto';
+            textarea.style.height = Math.min(textarea.scrollHeight, 120) + 'px';
+        }
+        async function handleSubmit(e) {
+            e.preventDefault();
+            const messageInput = document.getElementById('message-input');
+            const message = messageInput.value.trim();
+            if (!message) return;
+            const model = document.getElementById('model-select').value;
+            const temperature = parseFloat(document.getElementById('temperature').value);
+            const maxTokens = parseInt(document.getElementById('max-tokens').value);
+            if (!model) {
+                showStatus('Please select a model', 'error');
+                return;
+            }
+            addMessage(message, 'user');
+            messageInput.value = '';
+            autoResizeTextarea();
+            showTypingIndicator(true);
+            try {
+                const response = await fetch('/api/chat', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ model, prompt: message, temperature, max_tokens: maxTokens })
+                });
+                const data = await response.json();
+                showTypingIndicator(false);
+                if (data.status === 'success') {
+                    addMessage(data.response, 'assistant');
+                    showStatus(`Response generated using ${model}`, 'success');
+                } else {
+                    addMessage('Sorry, I encountered an error while processing your request.', 'assistant');
+                    showStatus(`Error: ${data.message}`, 'error');
+                }
+            } catch (error) {
+                showTypingIndicator(false);
+                addMessage('Sorry, I encountered a network error.', 'assistant');
+                showStatus('Network error: ' + error.message, 'error');
+            }
+        }
+        function addMessage(content, sender) {
+            const chatContainer = document.getElementById('chat-container');
+            const messageDiv = document.createElement('div');
+            messageDiv.className = `message ${sender}`;
+            const avatar = document.createElement('div');
+            avatar.className = 'message-avatar';
+            avatar.textContent = sender === 'user' ? 'U' : 'AI';
+            const messageContent = document.createElement('div');
+            messageContent.className = 'message-content';
+            messageContent.textContent = content;
+            messageDiv.appendChild(avatar);
+            messageDiv.appendChild(messageContent);
+            chatContainer.appendChild(messageDiv);
+            chatContainer.scrollTop = chatContainer.scrollHeight;
+            conversationHistory.push({ role: sender, content: content });
+        }
+        function showTypingIndicator(show) {
+            const indicator = document.getElementById('typing-indicator');
+            indicator.style.display = show ? 'block' : 'none';
+            if (show) {
+                const chatContainer = document.getElementById('chat-container');
+                chatContainer.scrollTop = chatContainer.scrollHeight;
+            }
+        }
+        function showStatus(message, type = '') {
+            const statusDiv = document.getElementById('status');
+            statusDiv.textContent = message;
+            statusDiv.className = `status ${type}`;
+            setTimeout(() => {
+                statusDiv.textContent = '';
+                statusDiv.className = 'status';
+            }, 5000);
+        }
+    </script>
+</body>
+</html>
+'''
+@app.route('/')
+def home():
+    """Main chat interface."""
+    return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, default_model=ALLOWED_MODELS)
+@app.route('/api/chat', methods=['POST'])
+def chat():
+    """Chat API endpoint."""
+    try:
+        data = request.get_json()
+        if not data or 'prompt' not in data or 'model' not in data:
+            return jsonify({"status": "error", "message": "Prompt and model are required"}), 400
+        prompt = data['prompt']
+        model = data['model']
+        temperature = data.get('temperature', TEMPERATURE)
+        max_tokens = data.get('max_tokens', MAX_TOKENS)
+        result = ollama_manager.generate(model, prompt, temperature=temperature, max_tokens=max_tokens)
+        return jsonify(result), 200 if result["status"] == "success" else 500
+    except Exception as e:
+        logging.error(f"Chat endpoint error: {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/models', methods=['GET'])
+def get_models():
+    """Get available models."""
     try:
         models = ollama_manager.list_models()
         return jsonify({
             "count": len(models)
         })
     except Exception as e:
+        logging.error(f"Models endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
+    """Health check endpoint."""
     try:
+        ollama_health = ollama_manager.health_check()
+        return jsonify({
+            "status": "healthy",
+            "ollama_api": ollama_health,
+            "timestamp": time.time()
+        })
     except Exception as e:
+        logging.error(f"Health check endpoint error: {e}")
         return jsonify({
             "status": "unhealthy",
+            "error": str(e),
+            "timestamp": time.time()
+        }), 500
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=False)