Spaces:

tommytracx
/

ollama-api

Sleeping

App Files Files Community

tommytracx commited on Aug 25

Commit

b82828c

verified ·

1 Parent(s): 28c2439

Update app.py

Browse files

Files changed (1) hide show

app.py +207 -386

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # app.py
-from flask import Flask, request, jsonify, render_template_string
 import os
 import requests
 import json
@@ -11,7 +11,7 @@ app = Flask(__name__)
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Configuration
-OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
 MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
 TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
@@ -43,8 +43,21 @@ class OllamaManager:
         """Return the list of available models."""
         return self.available_models
-    def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
-        """Generate text using a model."""
         if model_name not in self.available_models:
             return {"status": "error", "message": f"Model {model_name} not available"}
@@ -52,44 +65,53 @@ class OllamaManager:
             payload = {
                 "model": model_name,
                 "prompt": prompt,
-                "stream": False,
                 **kwargs
             }
-            response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
-            response.raise_for_status()
-            data = response.json()
-            return {
-                "status": "success",
-                "response": data.get('response', ''),
-                "model": model_name,
-                "usage": data.get('usage', {})
-            }
         except Exception as e:
             logging.error(f"Error generating response: {e}")
             return {"status": "error", "message": str(e)}
-    def health_check(self) -> Dict[str, Any]:
-        """Check the health of the Ollama API."""
-        try:
-            response = requests.get(f"{self.base_url}/api/tags", timeout=10)
-            response.raise_for_status()
-            return {"status": "healthy", "available_models": len(self.available_models)}
-        except Exception as e:
-            logging.error(f"Health check failed: {e}")
-            return {"status": "unhealthy", "error": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
-# HTML template for the chat interface (unchanged from original)
 HTML_TEMPLATE = '''
 <!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>OpenWebUI - Ollama Chat</title>
     <style>
         * {
             margin: 0;
             padding: 0;
@@ -97,385 +119,156 @@ HTML_TEMPLATE = '''
         }
         body {
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             min-height: 100vh;
             padding: 20px;
         }
         .container {
-            max-width: 1200px;
             margin: 0 auto;
-            background: white;
             border-radius: 20px;
             box-shadow: 0 20px 40px rgba(0,0,0,0.1);
-            overflow: hidden;
-        }
-        .header {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
             padding: 30px;
-            text-align: center;
         }
-        .header h1 {
             font-size: 2.5rem;
-            margin-bottom: 10px;
-            font-weight: 700;
         }
-        .header p {
             font-size: 1.1rem;
-            opacity: 0.9;
-        }
-        .controls {
-            padding: 20px 30px;
-            background: #f8f9fa;
-            border-bottom: 1px solid #e9ecef;
-            display: flex;
-            gap: 15px;
-            align-items: center;
-            flex-wrap: wrap;
-        }
-        .control-group {
-            display: flex;
-            align-items: center;
-            gap: 8px;
-        }
-        .control-group label {
-            font-weight: 600;
-            color: #495057;
-            min-width: 80px;
-        }
-        .control-group select,
-        .control-group input {
-            padding: 8px 12px;
-            border: 2px solid #e9ecef;
-            border-radius: 8px;
-            font-size: 14px;
-            transition: border-color 0.3s;
-        }
-        .control-group select:focus,
-        .control-group input:focus {
-            outline: none;
-            border-color: #667eea;
-        }
-        .chat-container {
-            height: 500px;
-            overflow-y: auto;
-            padding: 20px;
-            background: #fafbfc;
-        }
-        .message {
             margin-bottom: 20px;
-            display: flex;
-            gap: 15px;
         }
-        .message.user {
-            flex-direction: row-reverse;
-        }
-        .message-avatar {
-            width: 40px;
-            height: 40px;
-            border-radius: 50%;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            font-weight: bold;
-            color: white;
-            flex-shrink: 0;
-        }
-        .message.user .message-avatar {
-            background: #667eea;
-        }
-        .message.assistant .message-avatar {
-            background: #28a745;
-        }
-        .message-content {
-            background: white;
-            padding: 15px 20px;
-            border-radius: 18px;
-            max-width: 70%;
-            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
-            line-height: 1.5;
-        }
-        .message.user .message-content {
-            background: #667eea;
-            color: white;
-        }
-        .message.assistant .message-content {
-            background: white;
-            color: #333;
-        }
-        .input-container {
-            padding: 20px 30px;
-            background: white;
-            border-top: 1px solid #e9ecef;
-        }
-        .input-form {
-            display: flex;
-            gap: 15px;
-        }
-        .input-field {
-            flex: 1;
-            padding: 15px 20px;
-            border: 2px solid #e9ecef;
-            border-radius: 25px;
-            font-size: 16px;
-            transition: border-color 0.3s;
-            resize: none;
-            min-height: 50px;
-            max-height: 120px;
-        }
-        .input-field:focus {
-            outline: none;
-            border-color: #667eea;
         }
-        .send-button {
-            padding: 15px 30px;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             color: white;
-            border: none;
-            border-radius: 25px;
-            font-size: 16px;
-            font-weight: 600;
-            cursor: pointer;
-            transition: transform 0.2s;
-            min-width: 100px;
         }
-        .send-button:hover {
-            transform: translateY(-2px);
         }
-        .send-button:disabled {
-            opacity: 0.6;
-            cursor: not-allowed;
-            transform: none;
         }
-        .status {
-            text-align: center;
-            padding: 10px;
             font-size: 14px;
-            color: #6c757d;
-        }
-        .status.error {
-            color: #dc3545;
-        }
-        .status.success {
-            color: #28a745;
         }
-        .typing-indicator {
-            display: none;
-            padding: 15px 20px;
-            background: white;
-            border-radius: 18px;
-            color: #6c757d;
-            font-style: italic;
         }
         @media (max-width: 768px) {
-            .controls {
-                flex-direction: column;
-                align-items: stretch;
             }
-            .control-group {
-                justify-content: space-between;
-            }
-            .message-content {
-                max-width: 85%;
             }
         }
     </style>
 </head>
 <body>
     <div class="container">
-        <div class="header">
-            <h1>🤖 OpenWebUI</h1>
-            <p>Chat with your local Ollama models through Hugging Face Spaces</p>
-        </div>
-        <div class="controls">
-            <div class="control-group">
-                <label for="model-select">Model:</label>
-                <select id="model-select">
-                    <option value="">Select a model...</option>
-                </select>
-            </div>
-            <div class="control-group">
-                <label for="temperature">Temperature:</label>
-                <input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7">
-                <span id="temp-value">0.7</span>
-            </div>
-            <div class="control-group">
-                <label for="max-tokens">Max Tokens:</label>
-                <input type="number" id="max-tokens" min="1" max="4096" value="2048">
-            </div>
         </div>
-        <div class="chat-container" id="chat-container">
-            <div class="message assistant">
-                <div class="message-avatar">AI</div>
-                <div class="message-content">
-                    Hello! I'm your AI assistant powered by Ollama. How can I help you today?
-                </div>
-            </div>
         </div>
-        <div class="typing-indicator" id="typing-indicator">
-            AI is thinking...
         </div>
-        <div class="input-container">
-            <form class="input-form" id="chat-form">
-                <textarea
-                    class="input-field"
-                    id="message-input"
-                    placeholder="Type your message here..."
-                    rows="1"
-                ></textarea>
-                <button type="submit" class="send-button" id="send-button">
-                    Send
-                </button>
-            </form>
         </div>
-        <div class="status" id="status"></div>
     </div>
     <script>
-        let conversationHistory = [];
         document.addEventListener('DOMContentLoaded', function() {
-            loadModels();
-            setupEventListeners();
-            autoResizeTextarea();
-        });
-        async function loadModels() {
-            const modelSelect = document.getElementById('model-select');
-            modelSelect.innerHTML = '<option value="">Loading models...</option>';
-            try {
-                const response = await fetch('/api/models');
-                const data = await response.json();
-                modelSelect.innerHTML = '<option value="">Select a model...</option>';
-                if (data.status === 'success' && data.models.length > 0) {
-                    data.models.forEach(model => {
-                        const option = document.createElement('option');
-                        option.value = model;
-                        option.textContent = model;
-                        if (model === 'gemma-3-270m') {
-                            option.selected = true;
-                        }
-                        modelSelect.appendChild(option);
-                    });
-                    showStatus('Models loaded successfully', 'success');
-                } else {
-                    modelSelect.innerHTML = '<option value="">No models available</option>';
-                    showStatus('No models available from API', 'error');
-                }
-            } catch (error) {
-                console.error('Error loading models:', error);
-                modelSelect.innerHTML = '<option value="">No models available</option>';
-                showStatus('Failed to load models: ' + error.message, 'error');
-            }
-        }
-        function setupEventListeners() {
-            document.getElementById('chat-form').addEventListener('submit', handleSubmit);
-            document.getElementById('temperature').addEventListener('input', function() {
-                document.getElementById('temp-value').textContent = this.value;
             });
-            document.getElementById('message-input').addEventListener('input', autoResizeTextarea);
-        }
-        function autoResizeTextarea() {
-            const textarea = document.getElementById('message-input');
-            textarea.style.height = 'auto';
-            textarea.style.height = Math.min(textarea.scrollHeight, 120) + 'px';
-        }
-        async function handleSubmit(e) {
-            e.preventDefault();
-            const messageInput = document.getElementById('message-input');
-            const message = messageInput.value.trim();
-            if (!message) return;
-            const model = document.getElementById('model-select').value;
-            const temperature = parseFloat(document.getElementById('temperature').value);
-            const maxTokens = parseInt(document.getElementById('max-tokens').value);
-            if (!model) {
-                showStatus('Please select a model', 'error');
-                return;
-            }
-            addMessage(message, 'user');
-            messageInput.value = '';
-            autoResizeTextarea();
-            showTypingIndicator(true);
-            try {
-                const response = await fetch('/api/chat', {
-                    method: 'POST',
-                    headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ model, prompt: message, temperature, max_tokens: maxTokens })
-                });
-                const data = await response.json();
-                showTypingIndicator(false);
-                if (data.status === 'success') {
-                    addMessage(data.response, 'assistant');
-                    showStatus(`Response generated using ${model}`, 'success');
-                } else {
-                    addMessage('Sorry, I encountered an error while processing your request.', 'assistant');
-                    showStatus(`Error: ${data.message}`, 'error');
-                }
-            } catch (error) {
-                showTypingIndicator(false);
-                addMessage('Sorry, I encountered a network error.', 'assistant');
-                showStatus('Network error: ' + error.message, 'error');
-            }
-        }
-        function addMessage(content, sender) {
-            const chatContainer = document.getElementById('chat-container');
-            const messageDiv = document.createElement('div');
-            messageDiv.className = `message ${sender}`;
-            const avatar = document.createElement('div');
-            avatar.className = 'message-avatar';
-            avatar.textContent = sender === 'user' ? 'U' : 'AI';
-            const messageContent = document.createElement('div');
-            messageContent.className = 'message-content';
-            messageContent.textContent = content;
-            messageDiv.appendChild(avatar);
-            messageDiv.appendChild(messageContent);
-            chatContainer.appendChild(messageDiv);
-            chatContainer.scrollTop = chatContainer.scrollHeight;
-            conversationHistory.push({ role: sender, content: content });
-        }
-        function showTypingIndicator(show) {
-            const indicator = document.getElementById('typing-indicator');
-            indicator.style.display = show ? 'block' : 'none';
-            if (show) {
-                const chatContainer = document.getElementById('chat-container');
-                chatContainer.scrollTop = chatContainer.scrollHeight;
             }
-        }
-        function showStatus(message, type = '') {
-            const statusDiv = document.getElementById('status');
-            statusDiv.textContent = message;
-            statusDiv.className = `status ${type}`;
-            setTimeout(() => {
-                statusDiv.textContent = '';
-                statusDiv.className = 'status';
-            }, 5000);
-        }
     </script>
 </body>
 </html>
@@ -483,31 +276,12 @@ HTML_TEMPLATE = '''
 @app.route('/')
 def home():
-    """Main chat interface."""
-    return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, default_model=ALLOWED_MODELS)
-@app.route('/api/chat', methods=['POST'])
-def chat():
-    """Chat API endpoint."""
-    try:
-        data = request.get_json()
-        if not data or 'prompt' not in data or 'model' not in data:
-            return jsonify({"status": "error", "message": "Prompt and model are required"}), 400
-        prompt = data['prompt']
-        model = data['model']
-        temperature = data.get('temperature', TEMPERATURE)
-        max_tokens = data.get('max_tokens', MAX_TOKENS)
-        result = ollama_manager.generate(model, prompt, temperature=temperature, max_tokens=max_tokens)
-        return jsonify(result), 200 if result["status"] == "success" else 500
-    except Exception as e:
-        logging.error(f"Chat endpoint error: {e}")
-        return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/api/models', methods=['GET'])
-def get_models():
-    """Get available models."""
     try:
         models = ollama_manager.list_models()
         return jsonify({
@@ -519,23 +293,70 @@ def get_models():
         logging.error(f"Models endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
     """Health check endpoint."""
     try:
-        ollama_health = ollama_manager.health_check()
         return jsonify({
             "status": "healthy",
-            "ollama_api": ollama_health,
             "timestamp": time.time()
         })
     except Exception as e:
         logging.error(f"Health check endpoint error: {e}")
         return jsonify({
             "status": "unhealthy",
             "error": str(e),
             "timestamp": time.time()
-        }), 500
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860, debug=False)

 # app.py
+from flask import Flask, request, jsonify, Response
 import os
 import requests
 import json
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Configuration
+OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'https://huggingface.co/spaces/tommytracx/ollama-api')
 ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
 MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
 TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
         """Return the list of available models."""
         return self.available_models
+    def pull_model(self, model_name: str) -> Dict[str, Any]:
+        """Pull a model from Ollama."""
+        if model_name not in ALLOWED_MODELS:
+            return {"status": "error", "message": f"Model {model_name} not in allowed list"}
+        try:
+            response = requests.post(f"{self.base_url}/api/pull", json={"name": model_name}, timeout=300)
+            response.raise_for_status()
+            return {"status": "success", "model": model_name}
+        except Exception as e:
+            logging.error(f"Error pulling model {model_name}: {e}")
+            return {"status": "error", "message": str(e)}
+    def generate(self, model_name: str, prompt: str, stream: bool = False, **kwargs) -> Any:
+        """Generate text using a model, with optional streaming."""
         if model_name not in self.available_models:
             return {"status": "error", "message": f"Model {model_name} not available"}
             payload = {
                 "model": model_name,
                 "prompt": prompt,
+                "stream": stream,
                 **kwargs
             }
+            if stream:
+                response = requests.post(f"{self.base_url}/api/generate", json=payload, stream=True, timeout=120)
+                response.raise_for_status()
+                return response
+            else:
+                response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
+                response.raise_for_status()
+                data = response.json()
+                return {
+                    "status": "success",
+                    "response": data.get('response', ''),
+                    "model": model_name,
+                    "usage": data.get('usage', {})
+                }
         except Exception as e:
             logging.error(f"Error generating response: {e}")
             return {"status": "error", "message": str(e)}
 # Initialize Ollama manager
 ollama_manager = OllamaManager(OLLAMA_BASE_URL)
+# HTML template for the home page with improved UI
 HTML_TEMPLATE = '''
 <!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Ollama API Space</title>
     <style>
+        :root {
+            --primary-color: #667eea;
+            --secondary-color: #764ba2;
+            --text-color: #333;
+            --bg-color: #fafbfc;
+            --border-color: #e9ecef;
+        }
+        .dark-mode {
+            --primary-color: #3b4a8c;
+            --secondary-color: #4a2e6b;
+            --text-color: #f0f0f0;
+            --bg-color: #1a1a1a;
+            --border-color: #4a4a4a;
+        }
         * {
             margin: 0;
             padding: 0;
         }
         body {
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
+            color: var(--text-color);
             min-height: 100vh;
             padding: 20px;
         }
         .container {
+            max-width: 800px;
             margin: 0 auto;
+            background: var(--bg-color);
             border-radius: 20px;
             box-shadow: 0 20px 40px rgba(0,0,0,0.1);
             padding: 30px;
+            position: relative;
+        }
+        .theme-toggle {
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            background: none;
+            border: none;
+            cursor: pointer;
+            font-size: 1.2rem;
+            color: var(--text-color);
         }
+        h1 {
             font-size: 2.5rem;
+            margin-bottom: 20px;
+            text-align: center;
         }
+        p {
             font-size: 1.1rem;
+            line-height: 1.6;
             margin-bottom: 20px;
         }
+        .endpoint {
+            background: var(--border-color);
+            padding: 15px;
+            margin: 10px 0;
+            border-radius: 8px;
         }
+        .method {
+            background: var(--primary-color);
             color: white;
+            padding: 4px 10px;
+            border-radius: 4px;
+            font-size: 14px;
+            margin-right: 10px;
         }
+        .url {
+            font-family: monospace;
+            background: var(--bg-color);
+            padding: 4px 8px;
+            border-radius: 4px;
         }
+        pre {
+            background: var(--border-color);
+            padding: 15px;
+            border-radius: 8px;
+            overflow-x: auto;
         }
+        code {
+            font-family: monospace;
             font-size: 14px;
         }
+        .dark-mode pre, .dark-mode .endpoint {
+            background: #2a2a2a;
         }
         @media (max-width: 768px) {
+            .container {
+                padding: 20px;
             }
+            h1 {
+                font-size: 2rem;
             }
         }
     </style>
 </head>
 <body>
     <div class="container">
+        <button class="theme-toggle" id="theme-toggle">🌙</button>
+        <h1>🚀 Ollama API Space</h1>
+        <p>This Space provides a robust API for managing and interacting with Ollama models, optimized for integration with OpenWebUI and other clients.</p>
+        <h2>Available Endpoints</h2>
+        <div class="endpoint">
+            <span class="method">GET</span> <span class="url">/api/models</span>
+            <p>List all available models filtered by ALLOWED_MODELS.</p>
+            <p><strong>Response:</strong> <code>{"status": "success", "models": [...], "count": N}</code></p>
         </div>
+        <div class="endpoint">
+            <span class="method">POST</span> <span class="url">/api/models/pull</span>
+            <p>Pull a model from Ollama, restricted to ALLOWED_MODELS.</p>
+            <p><strong>Body:</strong> <code>{"name": "model_name"}</code></p>
+            <p><strong>Response:</strong> <code>{"status": "success", "model": "model_name"}</code></p>
         </div>
+        <div class="endpoint">
+            <span class="method">POST</span> <span class="url">/api/generate</span>
+            <p>Generate text using a model, with optional streaming.</p>
+            <p><strong>Body:</strong> <code>{"model": "model_name", "prompt": "your prompt", "stream": boolean}</code></p>
+            <p><strong>Response (non-streaming):</strong> <code>{"status": "success", "response": "...", "model": "...", "usage": {...}}</code></p>
+            <p><strong>Response (streaming):</strong> Stream of JSON objects</p>
         </div>
+        <div class="endpoint">
+            <span class="method">GET</span> <span class="url">/health</span>
+            <p>Health check endpoint for the API and Ollama connection.</p>
+            <p><strong>Response:</strong> <code>{"status": "healthy", "ollama_connection": "connected", "available_models": N}</code></p>
         </div>
+        <h2>Usage Examples</h2>
+        <p>Use this API with OpenWebUI or any REST client. Ensure models are in ALLOWED_MODELS: {{ allowed_models }}.</p>
+        <h3>cURL Examples</h3>
+        <pre>
+# List models
+curl {{ ollama_base_url }}/api/models
+# Pull a model
+curl -X POST {{ ollama_base_url }}/api/models/pull \
+  -H "Content-Type: application/json" \
+  -d '{"name": "gemma-3-270m"}'
+# Generate text (non-streaming)
+curl -X POST {{ ollama_base_url }}/api/generate \
+  -H "Content-Type: application/json" \
+  -d '{"model": "gemma-3-270m", "prompt": "Write a Python script"}'
+# Generate text (streaming)
+curl -X POST {{ ollama_base_url }}/api/generate \
+  -H "Content-Type: application/json" \
+  -d '{"model": "gemma-3-270m", "prompt": "Write a Python script", "stream": true}'
+        </pre>
     </div>
     <script>
         document.addEventListener('DOMContentLoaded', function() {
+            const themeToggle = document.getElementById('theme-toggle');
+            themeToggle.addEventListener('click', function() {
+                document.body.classList.toggle('dark-mode');
+                themeToggle.textContent = document.body.classList.contains('dark-mode') ? '☀️' : '🌙';
+                localStorage.setItem('theme', document.body.classList.contains('dark-mode') ? 'dark' : 'light');
             });
+            if (localStorage.getItem('theme') === 'dark') {
+                document.body.classList.add('dark-mode');
+                themeToggle.textContent = '☀️';
             }
+        });
     </script>
 </body>
 </html>
 @app.route('/')
 def home():
+    """Home page with API documentation."""
+    return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, allowed_models=', '.join(ALLOWED_MODELS))
 @app.route('/api/models', methods=['GET'])
+def list_models():
+    """List all available models."""
     try:
         models = ollama_manager.list_models()
         return jsonify({
         logging.error(f"Models endpoint error: {e}")
         return jsonify({"status": "error", "message": str(e)}), 500
+@app.route('/api/models/pull', methods=['POST'])
+def pull_model():
+    """Pull a model from Ollama."""
+    try:
+        data = request.get_json()
+        if not data or 'name' not in data:
+            return jsonify({"status": "error", "message": "Model name is required"}), 400
+        model_name = data['name']
+        if model_name not in ALLOWED_MODELS:
+            return jsonify({"status": "error", "message": f"Model {model_name} not in allowed list"}), 400
+        result = ollama_manager.pull_model(model_name)
+        return jsonify(result), 200 if result["status"] == "success" else 500
+    except Exception as e:
+        logging.error(f"Pull model endpoint error: {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
+@app.route('/api/generate', methods=['POST'])
+def generate_text():
+    """Generate text using a model, with optional streaming."""
+    try:
+        data = request.get_json()
+        if not data or 'model' not in data or 'prompt' not in data:
+            return jsonify({"status": "error", "message": "Model name and prompt are required"}), 400
+        model_name = data['model']
+        prompt = data['prompt']
+        stream = data.get('stream', False)
+        kwargs = {k: v for k in data if k not in ['model', 'prompt', 'stream']}
+        result = ollama_manager.generate(model_name, prompt, stream=stream, **kwargs)
+        if stream and isinstance(result, requests.Response):
+            def generate_stream():
+                for chunk in result.iter_content(chunk_size=None):
+                    yield chunk
+            return Response(generate_stream(), content_type='application/json')
+        else:
+            return jsonify(result), 200 if result["status"] == "success" else 500
+    except Exception as e:
+        logging.error(f"Generate endpoint error: {e}")
+        return jsonify({"status": "error", "message": str(e)}), 500
 @app.route('/health', methods=['GET'])
 def health_check():
     """Health check endpoint."""
     try:
+        response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
+        response.raise_for_status()
         return jsonify({
             "status": "healthy",
+            "ollama_connection": "connected",
+            "available_models": len(ollama_manager.available_models),
             "timestamp": time.time()
         })
     except Exception as e:
         logging.error(f"Health check endpoint error: {e}")
         return jsonify({
             "status": "unhealthy",
+            "ollama_connection": "failed",
             "error": str(e),
             "timestamp": time.time()
+        }), 503
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860, debug=False)