#!/usr/bin/env python3 """Fast Response Wrapper for QWEN2GOLEM""" import time import json import hashlib from functools import lru_cache # Cache for responses response_cache = {} def get_cached_response(prompt_hash): """Get cached response if available""" if prompt_hash in response_cache: age = time.time() - response_cache[prompt_hash]['timestamp'] if age < 300: # 5 minute cache return response_cache[prompt_hash]['response'] return None def cache_response(prompt_hash, response): """Cache a response""" response_cache[prompt_hash] = { 'response': response, 'timestamp': time.time() } # Limit cache size if len(response_cache) > 100: oldest = min(response_cache.items(), key=lambda x: x[1]['timestamp']) del response_cache[oldest[0]] def fast_generate(prompt, use_cache=True): """Fast generation with caching""" prompt_hash = hashlib.sha256(prompt.encode()).hexdigest() if use_cache: cached = get_cached_response(prompt_hash) if cached: return cached # Generate response (this would call the actual generator) # For now, return a placeholder response = f"Fast response to: {prompt[:50]}..." if use_cache: cache_response(prompt_hash, response) return response