Spaces:

irajkoohi
/

Agentic_Rag4_dep_space

Sleeping

File size: 4,313 Bytes

a338b61

"""
HuggingFace Space specific helpers for LLM operations.
Contains: HF InferenceClient initialization and response generation.
"""

import os
from typing import Optional
from huggingface_hub import InferenceClient

from helpers_SHARED import CONFIG, IS_HF_SPACE

# ============================================================================
# HUGGINGFACE LLM INITIALIZATION
# ============================================================================

hf_client = None
LLM_NAME = None

def init_hf_llm(model_name=None):
    """Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
    global hf_client, LLM_NAME
    
    if not IS_HF_SPACE:
        print("ℹ️ Not running on HF Space, skipping HF client initialization")
        return None, None
    
    # Use provided model_name or fallback to config
    if model_name is not None:
        LLM_NAME = model_name.split("/")[-1]
        # Patch CONFIG for downstream use
        CONFIG["hf_model"] = model_name
    else:
        LLM_NAME = CONFIG["hf_model"].split("/")[-1]
    try:
        hf_client = InferenceClient(token=os.getenv("HF_token"))
        print(f"✓ HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
        return hf_client, LLM_NAME
    except Exception as e:
        print(f"✗ Warning: HuggingFace InferenceClient not available: {e}")
        hf_client = None
        return None, LLM_NAME

# ============================================================================
# HUGGINGFACE LLM RESPONSE GENERATION
# ============================================================================

def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
    """Generate a chat response using HuggingFace InferenceClient.
    
    Args:
        prompt: The prompt to send to the model
        hf_client_instance: Optional HF client instance, uses global if not provided
    
    Returns:
        Generated response string or None if failed
    """
    client = hf_client_instance or hf_client
    
    if client is None:
        print("❌ HF client not available")
        return None
    
    try:
        print(f"🧠 Generating response with {LLM_NAME}...")
        response = client.chat_completion(
            messages=[{"role": "user", "content": prompt}],
            model=CONFIG["hf_model"],
            max_tokens=CONFIG["max_tokens"],
            temperature=CONFIG["temperature"]
        )
        result = response.choices[0].message.content
        if result:
            result = result.strip()
            print(f"✓ LLM response: {result[:100]}...")
            return result
        else:
            print(f"⚠️ Empty LLM response")
            return None
    except Exception as hf_error:
        print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
        return None

def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
    """Generate a text response using HuggingFace text_generation API.
    
    Used as fallback for simpler generation tasks.
    
    Args:
        prompt: The full prompt to send
        context: Document context for fallback response
        hf_client_instance: Optional HF client instance
    
    Returns:
        Generated response string
    """
    client = hf_client_instance or hf_client
    
    if client is None:
        print("❌ HF client not available")
        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."
    
    try:
        print(f"Generating response with {LLM_NAME}...")
        response = client.text_generation(
            prompt,
            model=CONFIG["hf_model"],
            max_new_tokens=CONFIG["max_tokens"],
            temperature=CONFIG["temperature_fallback"],
            return_full_text=False
        )
        print(f"✓ Success! Response generated.")
        return response
    except Exception as hf_error:
        print(f"❌ HF API error: {str(hf_error)}")
        return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."

def get_hf_client():
    """Get the HF client instance."""
    return hf_client

def get_hf_llm_name():
    """Get the HF LLM name."""
    return LLM_NAME