""" HuggingFace Space specific helpers for LLM operations. Contains: HF InferenceClient initialization and response generation. """ import os from typing import Optional from huggingface_hub import InferenceClient from helpers_SHARED import CONFIG, IS_HF_SPACE # ============================================================================ # HUGGINGFACE LLM INITIALIZATION # ============================================================================ hf_client = None LLM_NAME = None def init_hf_llm(model_name=None): """Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name.""" global hf_client, LLM_NAME if not IS_HF_SPACE: print("ℹ️ Not running on HF Space, skipping HF client initialization") return None, None # Use provided model_name or fallback to config if model_name is not None: LLM_NAME = model_name.split("/")[-1] # Patch CONFIG for downstream use CONFIG["hf_model"] = model_name else: LLM_NAME = CONFIG["hf_model"].split("/")[-1] try: hf_client = InferenceClient(token=os.getenv("HF_token")) print(f"✓ HuggingFace InferenceClient initialized successfully for {LLM_NAME}") return hf_client, LLM_NAME except Exception as e: print(f"✗ Warning: HuggingFace InferenceClient not available: {e}") hf_client = None return None, LLM_NAME # ============================================================================ # HUGGINGFACE LLM RESPONSE GENERATION # ============================================================================ def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]: """Generate a chat response using HuggingFace InferenceClient. Args: prompt: The prompt to send to the model hf_client_instance: Optional HF client instance, uses global if not provided Returns: Generated response string or None if failed """ client = hf_client_instance or hf_client if client is None: print("❌ HF client not available") return None try: print(f"🧠 Generating response with {LLM_NAME}...") response = client.chat_completion( messages=[{"role": "user", "content": prompt}], model=CONFIG["hf_model"], max_tokens=CONFIG["max_tokens"], temperature=CONFIG["temperature"] ) result = response.choices[0].message.content if result: result = result.strip() print(f"✓ LLM response: {result[:100]}...") return result else: print(f"⚠️ Empty LLM response") return None except Exception as hf_error: print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}") return None def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str: """Generate a text response using HuggingFace text_generation API. Used as fallback for simpler generation tasks. Args: prompt: The full prompt to send context: Document context for fallback response hf_client_instance: Optional HF client instance Returns: Generated response string """ client = hf_client_instance or hf_client if client is None: print("❌ HF client not available") return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..." try: print(f"Generating response with {LLM_NAME}...") response = client.text_generation( prompt, model=CONFIG["hf_model"], max_new_tokens=CONFIG["max_tokens"], temperature=CONFIG["temperature_fallback"], return_full_text=False ) print(f"✓ Success! Response generated.") return response except Exception as hf_error: print(f"❌ HF API error: {str(hf_error)}") return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..." def get_hf_client(): """Get the HF client instance.""" return hf_client def get_hf_llm_name(): """Get the HF LLM name.""" return LLM_NAME