Spaces:
Sleeping
Sleeping
| """ | |
| HuggingFace Space specific helpers for LLM operations. | |
| Contains: HF InferenceClient initialization and response generation. | |
| """ | |
| import os | |
| from typing import Optional | |
| from huggingface_hub import InferenceClient | |
| from helpers_SHARED import CONFIG, IS_HF_SPACE | |
| # ============================================================================ | |
| # HUGGINGFACE LLM INITIALIZATION | |
| # ============================================================================ | |
| hf_client = None | |
| LLM_NAME = None | |
| def init_hf_llm(model_name=None): | |
| """Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name.""" | |
| global hf_client, LLM_NAME | |
| if not IS_HF_SPACE: | |
| print("βΉοΈ Not running on HF Space, skipping HF client initialization") | |
| return None, None | |
| # Use provided model_name or fallback to config | |
| if model_name is not None: | |
| LLM_NAME = model_name.split("/")[-1] | |
| # Patch CONFIG for downstream use | |
| CONFIG["hf_model"] = model_name | |
| else: | |
| LLM_NAME = CONFIG["hf_model"].split("/")[-1] | |
| try: | |
| hf_client = InferenceClient(token=os.getenv("HF_token")) | |
| print(f"β HuggingFace InferenceClient initialized successfully for {LLM_NAME}") | |
| return hf_client, LLM_NAME | |
| except Exception as e: | |
| print(f"β Warning: HuggingFace InferenceClient not available: {e}") | |
| hf_client = None | |
| return None, LLM_NAME | |
| # ============================================================================ | |
| # HUGGINGFACE LLM RESPONSE GENERATION | |
| # ============================================================================ | |
| def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]: | |
| """Generate a chat response using HuggingFace InferenceClient. | |
| Args: | |
| prompt: The prompt to send to the model | |
| hf_client_instance: Optional HF client instance, uses global if not provided | |
| Returns: | |
| Generated response string or None if failed | |
| """ | |
| client = hf_client_instance or hf_client | |
| if client is None: | |
| print("β HF client not available") | |
| return None | |
| try: | |
| print(f"π§ Generating response with {LLM_NAME}...") | |
| response = client.chat_completion( | |
| messages=[{"role": "user", "content": prompt}], | |
| model=CONFIG["hf_model"], | |
| max_tokens=CONFIG["max_tokens"], | |
| temperature=CONFIG["temperature"] | |
| ) | |
| result = response.choices[0].message.content | |
| if result: | |
| result = result.strip() | |
| print(f"β LLM response: {result[:100]}...") | |
| return result | |
| else: | |
| print(f"β οΈ Empty LLM response") | |
| return None | |
| except Exception as hf_error: | |
| print(f"β HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}") | |
| return None | |
| def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str: | |
| """Generate a text response using HuggingFace text_generation API. | |
| Used as fallback for simpler generation tasks. | |
| Args: | |
| prompt: The full prompt to send | |
| context: Document context for fallback response | |
| hf_client_instance: Optional HF client instance | |
| Returns: | |
| Generated response string | |
| """ | |
| client = hf_client_instance or hf_client | |
| if client is None: | |
| print("β HF client not available") | |
| return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..." | |
| try: | |
| print(f"Generating response with {LLM_NAME}...") | |
| response = client.text_generation( | |
| prompt, | |
| model=CONFIG["hf_model"], | |
| max_new_tokens=CONFIG["max_tokens"], | |
| temperature=CONFIG["temperature_fallback"], | |
| return_full_text=False | |
| ) | |
| print(f"β Success! Response generated.") | |
| return response | |
| except Exception as hf_error: | |
| print(f"β HF API error: {str(hf_error)}") | |
| return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..." | |
| def get_hf_client(): | |
| """Get the HF client instance.""" | |
| return hf_client | |
| def get_hf_llm_name(): | |
| """Get the HF LLM name.""" | |
| return LLM_NAME | |