File size: 9,758 Bytes
df31088
 
 
3d910e2
df31088
 
 
 
3d910e2
 
 
 
df31088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d910e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df31088
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import os
import time
import random
from typing import List, Optional
from pathlib import Path
from dotenv import load_dotenv
import httpx
from openai import OpenAI
try:
	from huggingface_hub import InferenceClient
except ImportError:
	InferenceClient = None


def _chunk_list(items: List[str], chunk_size: int) -> List[List[str]]:
	return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]


class NoProxyHTTPClient(httpx.Client):
	def __init__(self, *args, **kwargs):
		# Ensure any 'proxies' key is ignored to prevent incompat issues
		kwargs.pop("proxies", None)
		super().__init__(*args, **kwargs)


class OpenAIEmbeddingsWrapper:
	"""
	Minimal embeddings wrapper compatible with LangChain's embeddings interface.
	Uses OpenAI Embeddings API via official SDK (client.embeddings.create) with batching and retries.
	Forces a custom httpx.Client to avoid unexpected 'proxies' kw errors.
	"""
	def __init__(self, model: str = "text-embedding-ada-002", api_key: str | None = None, timeout: float = 30.0):
		# Load .env from project root (one level up from backend/)
		project_root = Path(__file__).resolve().parents[1]
		load_dotenv(project_root / ".env")
		self.model = model
		self.api_key = api_key or os.getenv("OPENAI_API_KEY")
		if not self.api_key:
			raise ValueError("OPENAI_API_KEY is required for embeddings")
		# Timeout/backoff config
		self.timeout = timeout
		self.batch_size = int(os.getenv("OPENAI_EMBED_BATCH_SIZE", "64"))
		self.max_retries = int(os.getenv("OPENAI_EMBED_MAX_RETRIES", "6"))
		self.initial_backoff = float(os.getenv("OPENAI_EMBED_INITIAL_BACKOFF", "1.0"))
		self.backoff_multiplier = float(os.getenv("OPENAI_EMBED_BACKOFF_MULTIPLIER", "2.0"))
		# Initialize OpenAI SDK client with custom http client; rely on env for API key/base URL
		os.environ.setdefault("OPENAI_API_KEY", self.api_key)
		http_client = NoProxyHTTPClient(timeout=self.timeout)
		self.client = OpenAI(http_client=http_client)

	def _embed_once(self, inputs: List[str]) -> List[List[float]]:
		resp = self.client.embeddings.create(
			model=self.model,
			input=inputs,
			encoding_format="float",
		)
		return [item.embedding for item in resp.data]

	def _embed_with_retries(self, inputs: List[str]) -> List[List[float]]:
		attempt = 0
		backoff = self.initial_backoff
		while True:
			try:
				return self._embed_once(inputs)
			except Exception as err:
				status = None
				try:
					status = getattr(getattr(err, "response", None), "status_code", None)
				except Exception:
					status = None
				if (status in (429, 500, 502, 503, 504) or status is None) and attempt < self.max_retries:
					retry_after = 0.0
					try:
						retry_after = float(getattr(getattr(err, "response", None), "headers", {}).get("Retry-After", 0))
					except Exception:
						retry_after = 0.0
					jitter = random.uniform(0, 0.5)
					sleep_s = max(retry_after, backoff) + jitter
					time.sleep(sleep_s)
					attempt += 1
					backoff *= self.backoff_multiplier
					continue
				raise

	def _embed(self, inputs: List[str]) -> List[List[float]]:
		all_embeddings: List[List[float]] = []
		for batch in _chunk_list(inputs, self.batch_size):
			embeds = self._embed_with_retries(batch)
			all_embeddings.extend(embeds)
			time.sleep(float(os.getenv("OPENAI_EMBED_INTER_BATCH_DELAY", "0.2")))
		return all_embeddings

	def embed_query(self, text: str) -> List[float]:
		return self._embed([text])[0]
	
	def embed_documents(self, texts: List[str]) -> List[List[float]]:
		return self._embed(texts)
	
	def __call__(self, text: str) -> List[float]:
		"""
		Make the embeddings wrapper callable for compatibility with FAISS.
		When FAISS calls the embeddings object directly, this delegates to embed_query.
		"""
		return self.embed_query(text)


class HuggingFaceEmbeddingsWrapper:
	"""
	Embeddings wrapper compatible with LangChain's embeddings interface.
	Uses HuggingFace InferenceClient with Nebius provider for embeddings.
	Implements same interface as OpenAIEmbeddingsWrapper for drop-in replacement.
	"""
	def __init__(self, model: str = "Qwen/Qwen3-Embedding-8B", api_key: str | None = None, timeout: float = 60.0):
		if InferenceClient is None:
			raise ImportError("huggingface_hub is required for HuggingFace embeddings. Install it with: pip install huggingface_hub")
		
		# Load .env from project root (one level up from backend/)
		project_root = Path(__file__).resolve().parents[1]
		load_dotenv(project_root / ".env")
		
		self.model = model or os.getenv("HF_EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-8B")
		self.api_key = api_key or os.getenv("HF_TOKEN")
		if not self.api_key:
			raise ValueError("HF_TOKEN is required for HuggingFace embeddings. Set HF_TOKEN environment variable.")
		
		# Timeout/backoff config
		self.timeout = timeout
		self.batch_size = int(os.getenv("HF_EMBED_BATCH_SIZE", "32"))  # Smaller batch size for HF
		self.max_retries = int(os.getenv("HF_EMBED_MAX_RETRIES", "6"))
		self.initial_backoff = float(os.getenv("HF_EMBED_INITIAL_BACKOFF", "1.0"))
		self.backoff_multiplier = float(os.getenv("HF_EMBED_BACKOFF_MULTIPLIER", "2.0"))
		
		# Initialize HuggingFace InferenceClient with Nebius provider
		self.client = InferenceClient(
			provider="nebius",
			api_key=self.api_key
		)
		print(f"[HF Embeddings] Initialized with model: {self.model}, provider: nebius")

	def _embed_once(self, inputs: List[str]) -> List[List[float]]:
		"""Call HuggingFace feature_extraction API for a batch of texts"""
		import numpy as np
		
		# HuggingFace feature_extraction can handle single or batch inputs
		if len(inputs) == 1:
			# Single text
			result = self.client.feature_extraction(inputs[0], model=self.model)
			# Result is numpy.ndarray - convert to list
			if isinstance(result, np.ndarray):
				if result.ndim == 2:
					# 2D array - extract first row
					result = result[0].tolist()
				else:
					# 1D array - convert directly
					result = result.tolist()
			# Result is a list of floats (embedding vector)
			return [result]
		else:
			# Batch processing - HF may support batch, but we'll process one by one for reliability
			embeddings = []
			for text in inputs:
				result = self.client.feature_extraction(text, model=self.model)
				# Convert numpy array to list if needed
				if isinstance(result, np.ndarray):
					if result.ndim == 2:
						result = result[0].tolist()  # Extract first row if 2D
					else:
						result = result.tolist()
				embeddings.append(result)
			return embeddings

	def _embed_with_retries(self, inputs: List[str]) -> List[List[float]]:
		"""Embed with retry logic similar to OpenAI wrapper"""
		attempt = 0
		backoff = self.initial_backoff
		while True:
			try:
				return self._embed_once(inputs)
			except Exception as err:
				status = None
				try:
					# Try to extract status code from error if available
					status = getattr(getattr(err, "response", None), "status_code", None)
				except Exception:
					status = None
				
				if (status in (429, 500, 502, 503, 504) or status is None) and attempt < self.max_retries:
					retry_after = 0.0
					try:
						retry_after = float(getattr(getattr(err, "response", None), "headers", {}).get("Retry-After", 0))
					except Exception:
						retry_after = 0.0
					jitter = random.uniform(0, 0.5)
					sleep_s = max(retry_after, backoff) + jitter
					time.sleep(sleep_s)
					attempt += 1
					backoff *= self.backoff_multiplier
					continue
				raise

	def _embed(self, inputs: List[str]) -> List[List[float]]:
		"""Process embeddings in batches with delays between batches"""
		all_embeddings: List[List[float]] = []
		for batch in _chunk_list(inputs, self.batch_size):
			embeds = self._embed_with_retries(batch)
			all_embeddings.extend(embeds)
			# Small delay between batches to avoid rate limiting
			time.sleep(float(os.getenv("HF_EMBED_INTER_BATCH_DELAY", "0.2")))
		return all_embeddings

	def embed_query(self, text: str) -> List[float]:
		"""Embed a single query text"""
		return self._embed([text])[0]
	
	def embed_documents(self, texts: List[str]) -> List[List[float]]:
		"""Embed multiple documents"""
		return self._embed(texts)
	
	def __call__(self, text: str) -> List[float]:
		"""
		Make the embeddings wrapper callable for compatibility with FAISS.
		When FAISS calls the embeddings object directly, this delegates to embed_query.
		"""
		return self.embed_query(text)


def get_embeddings_wrapper(
	model: Optional[str] = None,
	api_key: Optional[str] = None,
	timeout: float = 30.0
):
	"""
	Factory function to get the appropriate embeddings wrapper based on configuration.
	
	Args:
		model: Model name (provider-specific)
		api_key: API key (provider-specific)
		timeout: Timeout in seconds
	
	Returns:
		Either OpenAIEmbeddingsWrapper or HuggingFaceEmbeddingsWrapper instance
	
	Environment Variables:
		EMBEDDINGS_PROVIDER: "openai" (default), "huggingface", "hf", or "nebius"
		HF_TOKEN: Required if using HuggingFace provider
		HF_EMBEDDING_MODEL: Optional model override for HuggingFace (default: "Qwen/Qwen3-Embedding-8B")
	"""
	# Load .env from project root
	project_root = Path(__file__).resolve().parents[1]
	load_dotenv(project_root / ".env")
	
	provider = os.getenv("EMBEDDINGS_PROVIDER", "hf").lower() #openai
	
	if provider in ["huggingface", "hf", "nebius"]:
		print(f"[Embeddings Factory] Using HuggingFace/Nebius provider")
		hf_model = model or os.getenv("HF_EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-8B")
		return HuggingFaceEmbeddingsWrapper(model=hf_model, api_key=api_key, timeout=timeout)
	else:
		print(f"[Embeddings Factory] Using OpenAI provider (default)")
		openai_model = model or os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
		return OpenAIEmbeddingsWrapper(model=openai_model, api_key=api_key, timeout=timeout)

#شرح نظام الأحوال الشخصية