golem-flask-backend / fast_wrapper.py
mememechez's picture
Deploy final cleaned source code
ca28016
#!/usr/bin/env python3
"""Fast Response Wrapper for QWEN2GOLEM"""
import time
import json
import hashlib
from functools import lru_cache
# Cache for responses
response_cache = {}
def get_cached_response(prompt_hash):
"""Get cached response if available"""
if prompt_hash in response_cache:
age = time.time() - response_cache[prompt_hash]['timestamp']
if age < 300: # 5 minute cache
return response_cache[prompt_hash]['response']
return None
def cache_response(prompt_hash, response):
"""Cache a response"""
response_cache[prompt_hash] = {
'response': response,
'timestamp': time.time()
}
# Limit cache size
if len(response_cache) > 100:
oldest = min(response_cache.items(), key=lambda x: x[1]['timestamp'])
del response_cache[oldest[0]]
def fast_generate(prompt, use_cache=True):
"""Fast generation with caching"""
prompt_hash = hashlib.sha256(prompt.encode()).hexdigest()
if use_cache:
cached = get_cached_response(prompt_hash)
if cached:
return cached
# Generate response (this would call the actual generator)
# For now, return a placeholder
response = f"Fast response to: {prompt[:50]}..."
if use_cache:
cache_response(prompt_hash, response)
return response