File size: 4,313 Bytes
a338b61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
HuggingFace Space specific helpers for LLM operations.
Contains: HF InferenceClient initialization and response generation.
"""

import os
from typing import Optional
from huggingface_hub import InferenceClient

from helpers_SHARED import CONFIG, IS_HF_SPACE

# ============================================================================
# HUGGINGFACE LLM INITIALIZATION
# ============================================================================

hf_client = None
LLM_NAME = None

def init_hf_llm(model_name=None):
    """Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
    global hf_client, LLM_NAME
    
    if not IS_HF_SPACE:
        print("ℹ️ Not running on HF Space, skipping HF client initialization")
        return None, None
    
    # Use provided model_name or fallback to config
    if model_name is not None:
        LLM_NAME = model_name.split("/")[-1]
        # Patch CONFIG for downstream use
        CONFIG["hf_model"] = model_name
    else:
        LLM_NAME = CONFIG["hf_model"].split("/")[-1]
    try:
        hf_client = InferenceClient(token=os.getenv("HF_token"))
        print(f"βœ“ HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
        return hf_client, LLM_NAME
    except Exception as e:
        print(f"βœ— Warning: HuggingFace InferenceClient not available: {e}")
        hf_client = None
        return None, LLM_NAME

# ============================================================================
# HUGGINGFACE LLM RESPONSE GENERATION
# ============================================================================

def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
    """Generate a chat response using HuggingFace InferenceClient.
    
    Args:
        prompt: The prompt to send to the model
        hf_client_instance: Optional HF client instance, uses global if not provided
    
    Returns:
        Generated response string or None if failed
    """
    client = hf_client_instance or hf_client
    
    if client is None:
        print("❌ HF client not available")
        return None
    
    try:
        print(f"🧠 Generating response with {LLM_NAME}...")
        response = client.chat_completion(
            messages=[{"role": "user", "content": prompt}],
            model=CONFIG["hf_model"],
            max_tokens=CONFIG["max_tokens"],
            temperature=CONFIG["temperature"]
        )
        result = response.choices[0].message.content
        if result:
            result = result.strip()
            print(f"βœ“ LLM response: {result[:100]}...")
            return result
        else:
            print(f"⚠️ Empty LLM response")
            return None
    except Exception as hf_error:
        print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
        return None

def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
    """Generate a text response using HuggingFace text_generation API.
    
    Used as fallback for simpler generation tasks.
    
    Args:
        prompt: The full prompt to send
        context: Document context for fallback response
        hf_client_instance: Optional HF client instance
    
    Returns:
        Generated response string
    """
    client = hf_client_instance or hf_client
    
    if client is None:
        print("❌ HF client not available")
        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
    
    try:
        print(f"Generating response with {LLM_NAME}...")
        response = client.text_generation(
            prompt,
            model=CONFIG["hf_model"],
            max_new_tokens=CONFIG["max_tokens"],
            temperature=CONFIG["temperature_fallback"],
            return_full_text=False
        )
        print(f"βœ“ Success! Response generated.")
        return response
    except Exception as hf_error:
        print(f"❌ HF API error: {str(hf_error)}")
        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."

def get_hf_client():
    """Get the HF client instance."""
    return hf_client

def get_hf_llm_name():
    """Get the HF LLM name."""
    return LLM_NAME