Agentic_Rag4_dep_space / helpers_HF.py
irajkoohi's picture
Redeploy all files after cleaning space
a338b61
"""
HuggingFace Space specific helpers for LLM operations.
Contains: HF InferenceClient initialization and response generation.
"""
import os
from typing import Optional
from huggingface_hub import InferenceClient
from helpers_SHARED import CONFIG, IS_HF_SPACE
# ============================================================================
# HUGGINGFACE LLM INITIALIZATION
# ============================================================================
hf_client = None
LLM_NAME = None
def init_hf_llm(model_name=None):
"""Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
global hf_client, LLM_NAME
if not IS_HF_SPACE:
print("ℹ️ Not running on HF Space, skipping HF client initialization")
return None, None
# Use provided model_name or fallback to config
if model_name is not None:
LLM_NAME = model_name.split("/")[-1]
# Patch CONFIG for downstream use
CONFIG["hf_model"] = model_name
else:
LLM_NAME = CONFIG["hf_model"].split("/")[-1]
try:
hf_client = InferenceClient(token=os.getenv("HF_token"))
print(f"βœ“ HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
return hf_client, LLM_NAME
except Exception as e:
print(f"βœ— Warning: HuggingFace InferenceClient not available: {e}")
hf_client = None
return None, LLM_NAME
# ============================================================================
# HUGGINGFACE LLM RESPONSE GENERATION
# ============================================================================
def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
"""Generate a chat response using HuggingFace InferenceClient.
Args:
prompt: The prompt to send to the model
hf_client_instance: Optional HF client instance, uses global if not provided
Returns:
Generated response string or None if failed
"""
client = hf_client_instance or hf_client
if client is None:
print("❌ HF client not available")
return None
try:
print(f"🧠 Generating response with {LLM_NAME}...")
response = client.chat_completion(
messages=[{"role": "user", "content": prompt}],
model=CONFIG["hf_model"],
max_tokens=CONFIG["max_tokens"],
temperature=CONFIG["temperature"]
)
result = response.choices[0].message.content
if result:
result = result.strip()
print(f"βœ“ LLM response: {result[:100]}...")
return result
else:
print(f"⚠️ Empty LLM response")
return None
except Exception as hf_error:
print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
return None
def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
"""Generate a text response using HuggingFace text_generation API.
Used as fallback for simpler generation tasks.
Args:
prompt: The full prompt to send
context: Document context for fallback response
hf_client_instance: Optional HF client instance
Returns:
Generated response string
"""
client = hf_client_instance or hf_client
if client is None:
print("❌ HF client not available")
return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
try:
print(f"Generating response with {LLM_NAME}...")
response = client.text_generation(
prompt,
model=CONFIG["hf_model"],
max_new_tokens=CONFIG["max_tokens"],
temperature=CONFIG["temperature_fallback"],
return_full_text=False
)
print(f"βœ“ Success! Response generated.")
return response
except Exception as hf_error:
print(f"❌ HF API error: {str(hf_error)}")
return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
def get_hf_client():
"""Get the HF client instance."""
return hf_client
def get_hf_llm_name():
"""Get the HF LLM name."""
return LLM_NAME