Spaces:

irajkoohi
/

Agentic_Rag4_dep_space

Sleeping

App Files Files Community

Agentic_Rag4_dep_space / helpers_HF.py

irajkoohi

Redeploy all files after cleaning space

a338b61 8 days ago

raw

history blame contribute delete

4.31 kB

	"""
	HuggingFace Space specific helpers for LLM operations.
	Contains: HF InferenceClient initialization and response generation.
	"""

	import os
	from typing import Optional
	from huggingface_hub import InferenceClient

	from helpers_SHARED import CONFIG, IS_HF_SPACE

	# ============================================================================
	# HUGGINGFACE LLM INITIALIZATION
	# ============================================================================

	hf_client = None
	LLM_NAME = None

	def init_hf_llm(model_name=None):
	"""Initialize HuggingFace InferenceClient for cloud LLM. Optionally specify model_name."""
	global hf_client, LLM_NAME

	if not IS_HF_SPACE:
	print("ℹ️ Not running on HF Space, skipping HF client initialization")
	return None, None

	# Use provided model_name or fallback to config
	if model_name is not None:
	LLM_NAME = model_name.split("/")[-1]
	# Patch CONFIG for downstream use
	CONFIG["hf_model"] = model_name
	else:
	LLM_NAME = CONFIG["hf_model"].split("/")[-1]
	try:
	hf_client = InferenceClient(token=os.getenv("HF_token"))
	print(f"✓ HuggingFace InferenceClient initialized successfully for {LLM_NAME}")
	return hf_client, LLM_NAME
	except Exception as e:
	print(f"✗ Warning: HuggingFace InferenceClient not available: {e}")
	hf_client = None
	return None, LLM_NAME

	# ============================================================================
	# HUGGINGFACE LLM RESPONSE GENERATION
	# ============================================================================

	def hf_generate_chat_response(prompt: str, hf_client_instance=None) -> Optional[str]:
	"""Generate a chat response using HuggingFace InferenceClient.

	Args:
	prompt: The prompt to send to the model
	hf_client_instance: Optional HF client instance, uses global if not provided

	Returns:
	Generated response string or None if failed
	"""
	client = hf_client_instance or hf_client

	if client is None:
	print("❌ HF client not available")
	return None

	try:
	print(f"🧠 Generating response with {LLM_NAME}...")
	response = client.chat_completion(
	messages=[{"role": "user", "content": prompt}],
	model=CONFIG["hf_model"],
	max_tokens=CONFIG["max_tokens"],
	temperature=CONFIG["temperature"]
	)
	result = response.choices[0].message.content
	if result:
	result = result.strip()
	print(f"✓ LLM response: {result[:100]}...")
	return result
	else:
	print(f"⚠️ Empty LLM response")
	return None
	except Exception as hf_error:
	print(f"❌ HF chat_completion error: {type(hf_error).__name__}: {str(hf_error)}")
	return None

	def hf_generate_text_response(prompt: str, context: str, hf_client_instance=None) -> str:
	"""Generate a text response using HuggingFace text_generation API.

	Used as fallback for simpler generation tasks.

	Args:
	prompt: The full prompt to send
	context: Document context for fallback response
	hf_client_instance: Optional HF client instance

	Returns:
	Generated response string
	"""
	client = hf_client_instance or hf_client

	if client is None:
	print("❌ HF client not available")
	return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."

	try:
	print(f"Generating response with {LLM_NAME}...")
	response = client.text_generation(
	prompt,
	model=CONFIG["hf_model"],
	max_new_tokens=CONFIG["max_tokens"],
	temperature=CONFIG["temperature_fallback"],
	return_full_text=False
	)
	print(f"✓ Success! Response generated.")
	return response
	except Exception as hf_error:
	print(f"❌ HF API error: {str(hf_error)}")
	return f"I found relevant information in your documents:\n\n{context[:CONFIG['search_content_limit']]}..."

	def get_hf_client():
	"""Get the HF client instance."""
	return hf_client

	def get_hf_llm_name():
	"""Get the HF LLM name."""
	return LLM_NAME