Spaces:
Sleeping
Sleeping
File size: 4,313 Bytes
a338b61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
"""
HuggingFace Space specific helpers for LLM operations.
Contains: HF InferenceClient initialization and response generation.
"""
import os
from typing import Optional
from huggingface_hub import InferenceClient
from helpers_SHARED import CONFIG, IS_HF_SPACE
# ============================================================================
# HUGGINGFACE LLM INITIALIZATION
# ============================================================================
hf_client = None
LLM_NAME = None
def init_hf_llm(model_name=None):
"""Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
global hf_client, LLM_NAME
if not IS_HF_SPACE:
print("βΉοΈ Not running on HF Space, skipping HF client initialization")
return None, None
# Use provided model_name or fallback to config
if model_name is not None:
LLM_NAME = model_name.split("/")[-1]
# Patch CONFIG for downstream use
CONFIG["hf_model"] = model_name
else:
LLM_NAME = CONFIG["hf_model"].split("/")[-1]
try:
hf_client = InferenceClient(token=os.getenv("HF_token"))
print(f"β HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
return hf_client, LLM_NAME
except Exception as e:
print(f"β Warning: HuggingFace InferenceClient not available: {e}")
hf_client = None
return None, LLM_NAME
# ============================================================================
# HUGGINGFACE LLM RESPONSE GENERATION
# ============================================================================
def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
"""Generate a chat response using HuggingFace InferenceClient.
Args:
prompt: The prompt to send to the model
hf_client_instance: Optional HF client instance, uses global if not provided
Returns:
Generated response string or None if failed
"""
client = hf_client_instance or hf_client
if client is None:
print("β HF client not available")
return None
try:
print(f"π§ Generating response with {LLM_NAME}...")
response = client.chat_completion(
messages=[{"role": "user", "content": prompt}],
model=CONFIG["hf_model"],
max_tokens=CONFIG["max_tokens"],
temperature=CONFIG["temperature"]
)
result = response.choices[0].message.content
if result:
result = result.strip()
print(f"β LLM response: {result[:100]}...")
return result
else:
print(f"β οΈ Empty LLM response")
return None
except Exception as hf_error:
print(f"β HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
return None
def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
"""Generate a text response using HuggingFace text_generation API.
Used as fallback for simpler generation tasks.
Args:
prompt: The full prompt to send
context: Document context for fallback response
hf_client_instance: Optional HF client instance
Returns:
Generated response string
"""
client = hf_client_instance or hf_client
if client is None:
print("β HF client not available")
return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
try:
print(f"Generating response with {LLM_NAME}...")
response = client.text_generation(
prompt,
model=CONFIG["hf_model"],
max_new_tokens=CONFIG["max_tokens"],
temperature=CONFIG["temperature_fallback"],
return_full_text=False
)
print(f"β Success! Response generated.")
return response
except Exception as hf_error:
print(f"β HF API error: {str(hf_error)}")
return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
def get_hf_client():
"""Get the HF client instance."""
return hf_client
def get_hf_llm_name():
"""Get the HF LLM name."""
return LLM_NAME
|