File size: 1,348 Bytes
ca28016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
"""Fast Response Wrapper for QWEN2GOLEM"""

import time
import json
import hashlib
from functools import lru_cache

# Cache for responses
response_cache = {}

def get_cached_response(prompt_hash):
    """Get cached response if available"""
    if prompt_hash in response_cache:
        age = time.time() - response_cache[prompt_hash]['timestamp']
        if age < 300:  # 5 minute cache
            return response_cache[prompt_hash]['response']
    return None

def cache_response(prompt_hash, response):
    """Cache a response"""
    response_cache[prompt_hash] = {
        'response': response,
        'timestamp': time.time()
    }
    # Limit cache size
    if len(response_cache) > 100:
        oldest = min(response_cache.items(), key=lambda x: x[1]['timestamp'])
        del response_cache[oldest[0]]

def fast_generate(prompt, use_cache=True):
    """Fast generation with caching"""
    prompt_hash = hashlib.sha256(prompt.encode()).hexdigest()
    
    if use_cache:
        cached = get_cached_response(prompt_hash)
        if cached:
            return cached
    
    # Generate response (this would call the actual generator)
    # For now, return a placeholder
    response = f"Fast response to: {prompt[:50]}..."
    
    if use_cache:
        cache_response(prompt_hash, response)
    
    return response