Spaces:

AldawsariNLP
/

Saudi-Law-AI-Assistant

Sleeping

App Files Files Community

AldawsariNLP commited on 10 days ago

Commit

97cdc7c

1 Parent(s): 81872f1

pushing alst changes ...

Browse files

Files changed (11) hide show

.gitignore +3 -0
backend/document_processor.py +1 -4
backend/main.py +11 -12
backend/rag_system.py +178 -60
frontend/build/asset-manifest.json +3 -3
frontend/build/index.html +1 -1
frontend/build/static/css/main.3a0c885e.css.map +1 -1
frontend/build/static/js/{main.882def61.js → main.2713a5e5.js} +0 -0
frontend/build/static/js/{main.882def61.js.LICENSE.txt → main.2713a5e5.js.LICENSE.txt} +0 -0
frontend/build/static/js/{main.882def61.js.map → main.2713a5e5.js.map} +0 -0
test_nebius_embeddings.py +0 -292

.gitignore CHANGED Viewed

@@ -25,6 +25,9 @@ vectorstore/
 *.pkl
 *.faiss
 # Processed documents JSON - included in repository (as requested)
 # processed_documents.json

 *.pkl
 *.faiss
+# Chunk vectorstores (binary files, use HF Xet if needed)
+chunk_vectorstores/
 # Processed documents JSON - included in repository (as requested)
 # processed_documents.json

backend/document_processor.py CHANGED Viewed

@@ -209,9 +209,7 @@ Return ONLY a single JSON object, with EXACTLY these two fields:
 			if existing_filenames:
 				print(f"Found {len(existing_filenames)} already processed documents")
-				print(f"Existing filenames (original): {list(existing_filenames)}")
-				print(f"Existing filenames (normalized): {list(existing_filenames_normalized)}")
 		pdf_files = list(folder.glob("*.pdf"))
 		new_processed_docs = []
 		skipped_count = 0
@@ -221,7 +219,6 @@ Return ONLY a single JSON object, with EXACTLY these two fields:
 			filename_normalized = self._normalize_filename(filename)
 			# Debug: Print comparison attempt
-			print(f"[Filename Check] Checking: '{filename}' (normalized: '{filename_normalized}')")
 			# Skip if already processed (using normalized comparison)
 			if skip_existing and filename_normalized in existing_filenames_normalized:

 			if existing_filenames:
 				print(f"Found {len(existing_filenames)} already processed documents")
 		pdf_files = list(folder.glob("*.pdf"))
 		new_processed_docs = []
 		skipped_count = 0
 			filename_normalized = self._normalize_filename(filename)
 			# Debug: Print comparison attempt
 			# Skip if already processed (using normalized comparison)
 			if skip_existing and filename_normalized in existing_filenames_normalized:

backend/main.py CHANGED Viewed

@@ -60,16 +60,16 @@ async def lifespan(app: FastAPI):
     # Load environment variables from .env file with debug output
     env_path = PROJECT_ROOT / ".env"
-    print(f"[Lifespan] .env file path: {env_path}")
-    print(f"[Lifespan] .env file exists? {env_path.exists()}")
     if env_path.exists():
         load_dotenv(env_path, override=True)
         api_key = os.getenv("OPENAI_API_KEY")
-        if api_key:
-            print(f"[Lifespan] OPENAI_API_KEY found (length: {len(api_key)} characters)")
-        else:
-            print("[Lifespan] WARNING: OPENAI_API_KEY not found in .env file")
     else:
         print(f"[Lifespan] WARNING: .env file not found at {env_path}")
         # Try loading anyway in case it's in a different location
@@ -131,10 +131,10 @@ def initialize_rag_system():
         rag_system = RAGSystem()
-        print(f"[RAG Init] processed_documents.json path: {PROCESSED_JSON}")
-        print(f"[RAG Init] processed_documents.json exists? {PROCESSED_JSON.exists()}")
-        print(f"[RAG Init] documents folder path: {DOCUMENTS_DIR}")
-        print(f"[RAG Init] documents folder exists? {DOCUMENTS_DIR.exists()}")
         if DOCUMENTS_DIR.exists() and any(DOCUMENTS_DIR.glob("*.pdf")):
             print("[RAG Init] PDFs detected, processing...")
@@ -197,8 +197,7 @@ async def ask_question(request: QuestionRequest):
             model_provider=request.model_provider ,
             context_mode=request.context_mode or "full",
         )
-        request_time = (time.perf_counter() - request_start) * 1000
-        print(f"[Timing] Total /ask endpoint time: {request_time:.2f}ms")
         return QuestionResponse(answer=answer, sources=sources)
     except Exception as e:
         raise HTTPException(

     # Load environment variables from .env file with debug output
     env_path = PROJECT_ROOT / ".env"
+    # print(f"[Lifespan] .env file path: {env_path}")
+    # print(f"[Lifespan] .env file exists? {env_path.exists()}")
     if env_path.exists():
         load_dotenv(env_path, override=True)
         api_key = os.getenv("OPENAI_API_KEY")
+        # if api_key:
+        #     print(f"[Lifespan] OPENAI_API_KEY found (length: {len(api_key)} characters)")
+        # else:
+        #     print("[Lifespan] WARNING: OPENAI_API_KEY not found in .env file")
     else:
         print(f"[Lifespan] WARNING: .env file not found at {env_path}")
         # Try loading anyway in case it's in a different location
         rag_system = RAGSystem()
+        # print(f"[RAG Init] processed_documents.json path: {PROCESSED_JSON}")
+        # print(f"[RAG Init] processed_documents.json exists? {PROCESSED_JSON.exists()}")
+        # print(f"[RAG Init] documents folder path: {DOCUMENTS_DIR}")
+        # print(f"[RAG Init] documents folder exists? {DOCUMENTS_DIR.exists()}")
         if DOCUMENTS_DIR.exists() and any(DOCUMENTS_DIR.glob("*.pdf")):
             print("[RAG Init] PDFs detected, processing...")
             model_provider=request.model_provider ,
             context_mode=request.context_mode or "full",
         )
         return QuestionResponse(answer=answer, sources=sources)
     except Exception as e:
         raise HTTPException(

backend/rag_system.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import os
 import json
 import time
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
 from langchain_community.vectorstores import FAISS
@@ -38,6 +41,16 @@ class RAGSystem:
 			self.json_path = json_path
 		self.vectorstore = None
 		# Initialize embeddings (supports OpenAI or HuggingFace based on EMBEDDINGS_PROVIDER env var)
 		provider = os.getenv("EMBEDDINGS_PROVIDER", "openai").lower()
 		if provider in ["huggingface", "hf", "nebius"]:
@@ -51,6 +64,7 @@ class RAGSystem:
 			if not embeddings_api_key:
 				raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
 		self.embeddings = get_embeddings_wrapper(api_key=embeddings_api_key)
 		# Initialize document processor (always uses OpenAI for LLM processing)
@@ -87,22 +101,13 @@ class RAGSystem:
 					embeddings=self.embeddings,
 					allow_dangerous_deserialization=True
 				)
-				# Ensure embedding function is properly set
-				# FAISS may use either embedding_function attribute or call embeddings directly
-				# Set embedding_function to the embed_query method for compatibility
-				if not hasattr(self.vectorstore, 'embedding_function') or self.vectorstore.embedding_function is None:
-					self.vectorstore.embedding_function = self.embeddings.embed_query
-				elif not callable(self.vectorstore.embedding_function):
-					self.vectorstore.embedding_function = self.embeddings.embed_query
-				# Also ensure the embeddings object itself is accessible and callable
-				# This handles cases where FAISS tries to call the embeddings object directly
-				if hasattr(self.vectorstore, 'embeddings'):
-					self.vectorstore.embeddings = self.embeddings
-				# Verify embedding function is working
-				if not callable(self.vectorstore.embedding_function):
-					raise ValueError("Embedding function is not callable after initialization")
 				print(f"Loaded existing vectorstore from {self.vectorstore_path}")
 			except Exception as e:
 				print(f"Could not load existing vectorstore: {e}")
@@ -286,6 +291,119 @@ class RAGSystem:
 		return None
 	def _get_or_build_chunk_vectorstore(
 		self,
 		filename: str,
@@ -294,7 +412,8 @@ class RAGSystem:
 		chunk_overlap: int = 300
 	) -> Tuple[FAISS, List[Document]]:
 		"""
-		Build or retrieve an in-memory FAISS vectorstore of semantic chunks for a single document.
 		Args:
 			filename: Document filename used as key in cache/metadata
@@ -305,11 +424,25 @@ class RAGSystem:
 		Returns:
 			Tuple of (FAISS vectorstore over chunks, list of chunk Documents)
 		"""
-		# Return from cache if available
 		if filename in self._chunk_cache:
 			entry = self._chunk_cache[filename]
 			return entry["vectorstore"], entry["chunks"]  # type: ignore[return-value]
 		# Create text splitter tuned for Arabic legal text
 		text_splitter = RecursiveCharacterTextSplitter(
 			chunk_size=chunk_size,
@@ -351,10 +484,16 @@ class RAGSystem:
 			]
 		chunk_vectorstore = FAISS.from_documents(chunk_docs, embedding=self.embeddings)
 		self._chunk_cache[filename] = {
 			"vectorstore": chunk_vectorstore,
 			"chunks": chunk_docs,
 		}
 		return chunk_vectorstore, chunk_docs
 	def _classify_question(self, question: str, use_history: bool = True, model_provider: str = "openai") -> Tuple[str, Optional[str], Optional[List[str]], Optional[List[str]]]:
@@ -531,9 +670,9 @@ Respond with ONLY one of: "law-new", "law-followup", or provide an answer if it'
 		if use_history:
 			previous_document = self.chat_history.get_last_document()
-		# Build search query with last chat turn context if history is enabled
 		search_query = question
-		if use_history:
 			last_turn = self.chat_history.get_last_turn()
 			if last_turn:
 				# Format last turn as text
@@ -550,39 +689,44 @@ Respond with ONLY one of: "law-new", "law-followup", or provide an answer if it'
 		# Perform similarity search with scores for relevance checking
 		# Use k=3 to get multiple candidates for comparison
 		similar_docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=3)
-		search_time = (time.perf_counter() - search_start) * 1000
-		print(f"[Timing] Similarity search: {search_time:.2f}ms")
 		if not similar_docs_with_scores:
 			return "I couldn't find any relevant information to answer your question.", [], None
 		# Extract best matching document and score
 		best_doc, best_score = similar_docs_with_scores[0]
 		best_filename = best_doc.metadata.get("filename", "")
 		# Step 2: Check if we should reuse previous document
 		matched_filename = best_filename
 		if previous_document and use_history:
-			# Check if previous document is in the search results
-			previous_doc_found = False
 			previous_doc_score = None
 			for doc, score in similar_docs_with_scores:
 				filename = doc.metadata.get("filename", "")
 				if filename == previous_document:
-					previous_doc_found = True
 					previous_doc_score = score
 					break
-			if previous_doc_found and previous_doc_score is not None:
 				# Check if previous document score is close to best score
 				# FAISS returns distance scores (lower is better), so we compare the difference
 				score_difference = abs(best_score - previous_doc_score)
-				# If difference is small (within 0.15), reuse previous document
-				# This threshold can be adjusted based on testing
 				relevance_threshold = 0.15
 				if score_difference <= relevance_threshold:
 					matched_filename = previous_document
 					print(f"[RAG] Reusing previous document: {matched_filename} (score diff: {score_difference:.4f})")
 				else:
@@ -590,41 +734,16 @@ Respond with ONLY one of: "law-new", "law-followup", or provide an answer if it'
 			else:
 				print(f"[RAG] Previous document not in top results, using best match: {best_filename}")
-		# Get the matched document object
-		matched_doc = None
-		for doc, _ in similar_docs_with_scores:
-			if doc.metadata.get("filename", "") == matched_filename:
-				matched_doc = doc
-				break
-		# If matched document not found in results (shouldn't happen), use best match
-		if matched_doc is None:
-			matched_doc = best_doc
-			matched_filename = best_filename
 		# Print the filename and most similar summary
 		print(f"[RAG] Matched filename: {matched_filename}")
-		if not matched_filename:
-			return "Error: No filename found in matched document metadata.", [], None
 		# Step 3: Retrieve full text from JSON (with caching)
 		retrieval_start = time.perf_counter()
 		full_text = self._get_text_by_filename_cached(matched_filename)
-		retrieval_time = (time.perf_counter() - retrieval_start) * 1000
-		print(f"[Timing] Text retrieval from JSON: {retrieval_time:.2f}ms")
 		if not full_text:
-			# Load JSON to get available filenames for error message
-			docs = self._load_json_cached()
-			available_filenames = [doc.get("filename", "unknown") for doc in docs] if isinstance(docs, list) else []
-			error_msg = f"Could not retrieve text for document: '{matched_filename}'. "
-			if available_filenames:
-				error_msg += f"Available filenames in JSON: {', '.join(available_filenames)}"
-			else:
-				error_msg += "JSON file is empty or invalid."
 			return error_msg, [matched_filename], None
@@ -655,14 +774,13 @@ Respond with ONLY one of: "law-new", "law-followup", or provide an answer if it'
 				previous_doc = self.chat_history.get_last_document()
 				if previous_chunks and previous_doc == matched_filename:
 					print(f"[RAG] Reusing previous chunks for law-followup question ({len(previous_chunks)} chunks)")
-					selected_chunks = previous_chunks  # Reuse previous chunks
 					document_context_label = "Selected Document Excerpts"
 					chunk_texts: List[str] = []
 					for idx, chunk_text in enumerate(previous_chunks, start=1):
 						chunk_texts.append(f"[مقطع {idx}]\n{chunk_text}")
 					document_context = "\n\n".join(chunk_texts)[:25000]
 				else:
-					previous_chunks = None  # Can't reuse, do new search
 					print(f"[RAG] Cannot reuse chunks: law-followup but different document or no previous chunks")
 			# If not reusing previous chunks, do normal chunk search (for law-new or when reuse not possible)
@@ -748,8 +866,9 @@ MUST Answer the Question in Arabic."""
 		# Add chat history (excluding the last user message if it's the current question)
 		if history_messages:
-			# Add history but skip if last message is the same question
-			for msg in history_messages[:-1] if len(history_messages) > 0 and history_messages[-1].get("content") == question else history_messages:
 				messages.append(msg)
 		messages.append({"role": "user", "content": user_prompt})
@@ -796,7 +915,6 @@ MUST Answer the Question in Arabic."""
 			parse_start = time.perf_counter()
 			answer = self._parse_llm_response(raw_response)
 			parse_time = (time.perf_counter() - parse_start) * 1000
-			print(f"[Timing] Response parsing: {parse_time:.2f}ms")
 			# Step 7: Update chat history with document source and chunks
 			self.chat_history.add_message("user", question)

 import os
 import json
 import time
+import pickle
+import hashlib
+import re
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
 from langchain_community.vectorstores import FAISS
 			self.json_path = json_path
 		self.vectorstore = None
+		# Chunk vectorstores directory path
+		if json_path is None:
+			project_root = Path(__file__).resolve().parents[1]
+			self.chunk_vectorstores_path = str(project_root / "chunk_vectorstores")
+		else:
+			project_root = Path(json_path).parent
+			self.chunk_vectorstores_path = str(project_root / "chunk_vectorstores")
+		# Create directory if it doesn't exist
+		os.makedirs(self.chunk_vectorstores_path, exist_ok=True)
 		# Initialize embeddings (supports OpenAI or HuggingFace based on EMBEDDINGS_PROVIDER env var)
 		provider = os.getenv("EMBEDDINGS_PROVIDER", "openai").lower()
 		if provider in ["huggingface", "hf", "nebius"]:
 			if not embeddings_api_key:
 				raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
+		print(f"[RAGSystem] Using embeddings provider: {provider}")
 		self.embeddings = get_embeddings_wrapper(api_key=embeddings_api_key)
 		# Initialize document processor (always uses OpenAI for LLM processing)
 					embeddings=self.embeddings,
 					allow_dangerous_deserialization=True
 				)
+				# Ensure embedding function is properly set.
+				# LangChain now expects an Embeddings *object* here, not a raw function.
+				if not hasattr(self.vectorstore, "embedding_function") or self.vectorstore.embedding_function is None:
+					self.vectorstore.embedding_function = self.embeddings
+				# Some versions may set a non-callable placeholder; override with our wrapper.
+				elif not callable(getattr(self.vectorstore.embedding_function, "embed_query", None)):
+					self.vectorstore.embedding_function = self.embeddings
 				print(f"Loaded existing vectorstore from {self.vectorstore_path}")
 			except Exception as e:
 				print(f"Could not load existing vectorstore: {e}")
 		return None
+	def _sanitize_filename(self, filename: str) -> str:
+		"""
+		Sanitize filename to create a safe directory name.
+		Handles Arabic filenames and special characters.
+		Args:
+			filename: Original filename
+		Returns:
+			Sanitized directory name safe for filesystem
+		"""
+		# Remove extension
+		name_without_ext = Path(filename).stem
+		# Create a hash of the original filename for uniqueness
+		# This ensures Arabic and special characters are handled
+		filename_hash = hashlib.md5(filename.encode('utf-8')).hexdigest()[:8]
+		# Sanitize: keep alphanumeric, Arabic characters, spaces, hyphens, underscores
+		# Replace other special chars with underscore
+		sanitized = re.sub(r'[^\w\s\u0600-\u06FF\-]', '_', name_without_ext)
+		# Replace multiple spaces/underscores with single underscore
+		sanitized = re.sub(r'[\s_]+', '_', sanitized)
+		# Remove leading/trailing underscores
+		sanitized = sanitized.strip('_')
+		# Combine sanitized name with hash for uniqueness
+		if sanitized:
+			return f"{sanitized}_{filename_hash}"
+		else:
+			return filename_hash
+	def _get_chunk_vectorstore_path(self, filename: str) -> str:
+		"""
+		Get the directory path for a document's chunk vectorstore.
+		Args:
+			filename: Document filename
+		Returns:
+			Path to the directory containing the chunk vectorstore
+		"""
+		sanitized_name = self._sanitize_filename(filename)
+		return str(Path(self.chunk_vectorstores_path) / sanitized_name)
+	def _save_chunk_vectorstore(self, filename: str, vectorstore: FAISS, chunks: List[Document]) -> None:
+		"""
+		Save chunk vectorstore and chunks metadata to disk.
+		Args:
+			filename: Document filename
+			vectorstore: FAISS vectorstore to save
+			chunks: List of Document objects (chunks metadata)
+		"""
+		chunk_vs_path = self._get_chunk_vectorstore_path(filename)
+		os.makedirs(chunk_vs_path, exist_ok=True)
+		# Save FAISS vectorstore (saves index.faiss and index.pkl)
+		vectorstore.save_local(chunk_vs_path)
+		# Save chunks metadata as pickle
+		chunks_path = Path(chunk_vs_path) / "chunks.pkl"
+		with open(chunks_path, 'wb') as f:
+			pickle.dump(chunks, f)
+		print(f"[Chunk Vectorstore] Saved chunk vectorstore for '{filename}'")
+	def _load_chunk_vectorstore(self, filename: str) -> Optional[Tuple[FAISS, List[Document]]]:
+		"""
+		Load chunk vectorstore and chunks metadata from disk if exists.
+		Args:
+			filename: Document filename
+		Returns:
+			Tuple of (FAISS vectorstore, List[Document]) if found, None otherwise
+		"""
+		chunk_vs_path = self._get_chunk_vectorstore_path(filename)
+		chunk_vs_path_obj = Path(chunk_vs_path)
+		# Check if vectorstore files exist
+		faiss_index = chunk_vs_path_obj / "index.faiss"
+		faiss_pkl = chunk_vs_path_obj / "index.pkl"
+		chunks_pkl = chunk_vs_path_obj / "chunks.pkl"
+		if not (faiss_index.exists() and faiss_pkl.exists() and chunks_pkl.exists()):
+			return None
+		try:
+			# Load FAISS vectorstore
+			vectorstore = FAISS.load_local(
+				chunk_vs_path,
+				embeddings=self.embeddings,
+				allow_dangerous_deserialization=True
+			)
+			# Ensure embedding function is properly set to the embeddings wrapper object.
+			if not hasattr(vectorstore, "embedding_function") or vectorstore.embedding_function is None:
+				vectorstore.embedding_function = self.embeddings
+			elif not callable(getattr(vectorstore.embedding_function, "embed_query", None)):
+				vectorstore.embedding_function = self.embeddings
+			# Load chunks metadata
+			with open(chunks_pkl, 'rb') as f:
+				chunks = pickle.load(f)
+			print(f"[Chunk Vectorstore] Loaded chunk vectorstore for '{filename}'")
+			return vectorstore, chunks
+		except Exception as e:
+			print(f"[Chunk Vectorstore] Error loading chunk vectorstore for '{filename}': {e}")
+			return None
 	def _get_or_build_chunk_vectorstore(
 		self,
 		filename: str,
 		chunk_overlap: int = 300
 	) -> Tuple[FAISS, List[Document]]:
 		"""
+		Build or retrieve a FAISS vectorstore of semantic chunks for a single document.
+		Checks disk first, then memory cache, then builds if needed.
 		Args:
 			filename: Document filename used as key in cache/metadata
 		Returns:
 			Tuple of (FAISS vectorstore over chunks, list of chunk Documents)
 		"""
+		# Step 1: Return from memory cache if available (fastest)
 		if filename in self._chunk_cache:
 			entry = self._chunk_cache[filename]
 			return entry["vectorstore"], entry["chunks"]  # type: ignore[return-value]
+		# Step 2: Try to load from disk
+		loaded = self._load_chunk_vectorstore(filename)
+		if loaded is not None:
+			vectorstore, chunks = loaded
+			# Cache in memory for faster access
+			self._chunk_cache[filename] = {
+				"vectorstore": vectorstore,
+				"chunks": chunks,
+			}
+			return vectorstore, chunks
+		# Step 3: Build new vectorstore (not found in cache or disk)
+		print(f"[Chunk Vectorstore] Building new chunk vectorstore for '{filename}'")
 		# Create text splitter tuned for Arabic legal text
 		text_splitter = RecursiveCharacterTextSplitter(
 			chunk_size=chunk_size,
 			]
 		chunk_vectorstore = FAISS.from_documents(chunk_docs, embedding=self.embeddings)
+		# Step 4: Save to disk for future use
+		self._save_chunk_vectorstore(filename, chunk_vectorstore, chunk_docs)
+		# Step 5: Cache in memory for current session
 		self._chunk_cache[filename] = {
 			"vectorstore": chunk_vectorstore,
 			"chunks": chunk_docs,
 		}
 		return chunk_vectorstore, chunk_docs
 	def _classify_question(self, question: str, use_history: bool = True, model_provider: str = "openai") -> Tuple[str, Optional[str], Optional[List[str]], Optional[List[str]]]:
 		if use_history:
 			previous_document = self.chat_history.get_last_document()
+		# Build search query with last chat turn context only for follow-up questions
 		search_query = question
+		if use_history and label == "law-followup":
 			last_turn = self.chat_history.get_last_turn()
 			if last_turn:
 				# Format last turn as text
 		# Perform similarity search with scores for relevance checking
 		# Use k=3 to get multiple candidates for comparison
 		similar_docs_with_scores = self.vectorstore.similarity_search_with_score(search_query, k=3)
 		if not similar_docs_with_scores:
 			return "I couldn't find any relevant information to answer your question.", [], None
 		# Extract best matching document and score
+		best_doc, best_score = similar_docs_with_scores[0]
+		print(f"[RAG] All document scores:")
+		for idx, (doc, score) in enumerate(similar_docs_with_scores, 1):
+			filename = doc.metadata.get("filename", "unknown")
+			print(f"  {idx}. {filename}: {score:.4f}")
+		print(f"[RAG] Best document: {best_score:.4f}")
 		best_doc, best_score = similar_docs_with_scores[0]
 		best_filename = best_doc.metadata.get("filename", "")
 		# Step 2: Check if we should reuse previous document
+		matched_doc = best_doc
 		matched_filename = best_filename
 		if previous_document and use_history:
+			# Check if previous document is in the search results and capture doc object
 			previous_doc_score = None
+			previous_doc_obj = None
 			for doc, score in similar_docs_with_scores:
 				filename = doc.metadata.get("filename", "")
 				if filename == previous_document:
 					previous_doc_score = score
+					previous_doc_obj = doc
 					break
+			if previous_doc_score is not None:
 				# Check if previous document score is close to best score
 				# FAISS returns distance scores (lower is better), so we compare the difference
 				score_difference = abs(best_score - previous_doc_score)
 				relevance_threshold = 0.15
 				if score_difference <= relevance_threshold:
+					matched_doc = previous_doc_obj
 					matched_filename = previous_document
 					print(f"[RAG] Reusing previous document: {matched_filename} (score diff: {score_difference:.4f})")
 				else:
 			else:
 				print(f"[RAG] Previous document not in top results, using best match: {best_filename}")
 		# Print the filename and most similar summary
 		print(f"[RAG] Matched filename: {matched_filename}")
 		# Step 3: Retrieve full text from JSON (with caching)
 		retrieval_start = time.perf_counter()
 		full_text = self._get_text_by_filename_cached(matched_filename)
 		if not full_text:
+			error_msg = f"Could not retrieve text for document: '{matched_filename}'. Please ensure the document is properly processed."
 			return error_msg, [matched_filename], None
 				previous_doc = self.chat_history.get_last_document()
 				if previous_chunks and previous_doc == matched_filename:
 					print(f"[RAG] Reusing previous chunks for law-followup question ({len(previous_chunks)} chunks)")
+					selected_chunks = previous_chunks
 					document_context_label = "Selected Document Excerpts"
 					chunk_texts: List[str] = []
 					for idx, chunk_text in enumerate(previous_chunks, start=1):
 						chunk_texts.append(f"[مقطع {idx}]\n{chunk_text}")
 					document_context = "\n\n".join(chunk_texts)[:25000]
 				else:
 					print(f"[RAG] Cannot reuse chunks: law-followup but different document or no previous chunks")
 			# If not reusing previous chunks, do normal chunk search (for law-new or when reuse not possible)
 		# Add chat history (excluding the last user message if it's the current question)
 		if history_messages:
+			# Skip last message if it's the same as current question
+			skip_last = len(history_messages) > 0 and history_messages[-1].get("content") == question
+			for msg in history_messages[:-1] if skip_last else history_messages:
 				messages.append(msg)
 		messages.append({"role": "user", "content": user_prompt})
 			parse_start = time.perf_counter()
 			answer = self._parse_llm_response(raw_response)
 			parse_time = (time.perf_counter() - parse_start) * 1000
 			# Step 7: Update chat history with document source and chunks
 			self.chat_history.add_message("user", question)

frontend/build/asset-manifest.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "files": {
     "main.css": "/static/css/main.3a0c885e.css",
-    "main.js": "/static/js/main.882def61.js",
     "index.html": "/index.html",
     "main.3a0c885e.css.map": "/static/css/main.3a0c885e.css.map",
-    "main.882def61.js.map": "/static/js/main.882def61.js.map"
   },
   "entrypoints": [
     "static/css/main.3a0c885e.css",
-    "static/js/main.882def61.js"
   ]
 }

 {
   "files": {
     "main.css": "/static/css/main.3a0c885e.css",
+    "main.js": "/static/js/main.2713a5e5.js",
     "index.html": "/index.html",
     "main.3a0c885e.css.map": "/static/css/main.3a0c885e.css.map",
+    "main.2713a5e5.js.map": "/static/js/main.2713a5e5.js.map"
   },
   "entrypoints": [
     "static/css/main.3a0c885e.css",
+    "static/js/main.2713a5e5.js"
   ]
 }

frontend/build/index.html CHANGED Viewed

@@ -1 +1 @@

- <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Law Document RAG Chat Application"/><title>Law Document Assistant</title><script defer="defer" src="/static/js/main.~~882def61~~.js"></script><link href="/static/css/main.3a0c885e.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Law Document RAG Chat Application"/><title>Law Document Assistant</title><script defer="defer" src="/static/js/main.2713a5e5.js"></script><link href="/static/css/main.3a0c885e.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

frontend/build/static/css/main.3a0c885e.css.map CHANGED Viewed

@@ -1 +1 @@

- {"version":3,"file":"static/css/main.3a0c885e.css","mappings":"AAAA,EAGE,qBAAsB,CADtB,SAEF,CAEA,OALE,QAcF,CATA,KAKE,kCAAmC,CACnC,iCAAkC,CAClC,kDAA6D,CAL7D,mIAEY,CAIZ,gBACF,CAEA,KACE,uEAEF,CCpBA,KAGE,kBAAmB,CAGnB,aAAc,CALd,YAAa,CAMb,mEAAmF,CALnF,sBAAuB,CAEvB,gBAAiB,CACjB,YAGF,CAEA,gBAIE,eAAiB,CACjB,kBAAmB,CACnB,gCAA0C,CAC1C,YAAa,CACb,qBAAsB,CALtB,WAAY,CADZ,eAAgB,CAOhB,eAAgB,CARhB,UASF,CAEA,aACE,kDAA6D,CAI7D,+BAAyC,CAHzC,UAAY,CACZ,YAAa,CACb,iBAEF,CAEA,gBAEE,cAAe,CACf,eAAgB,CAFhB,eAGF,CAEA,eAGE,cAAe,CAFf,eAAkB,CAClB,UAEF,CAEA,oBAIE,kBAAmB,CAHnB,QAAO,CACP,eAAgB,CAChB,YAEF,CAEA,iBAGE,UAAW,CADX,iBAAkB,CADlB,iBAGF,CAEA,oBACE,UAAW,CAEX,cAAe,CADf,kBAEF,CAEA,mBAEE,cAAe,CADf,aAEF,CAEA,uBAEE,UAAW,CADX,cAAe,CAEf,iBACF,CAEA,SAGE,yBAA2B,CAD3B,YAAa,CADb,kBAGF,CAEA,kBACE,GACE,SAAU,CACV,0BACF,CACA,GACE,SAAU,CACV,uBACF,CACF,CAMA,iCACE,0BACF,CAEA,iBAGE,kBAAmB,CACnB,8BAAwC,CAHxC,aAAc,CACd,iBAGF,CAEA,+BACE,kDAA6D,CAE7D,6BAA8B,CAD9B,UAEF,CAEA,oCACE,eAAiB,CAEjB,8BAA+B,CAD/B,UAEF,CAEA,gBACE,cAAe,CACf,eAAgB,CAChB,iBAAkB,CAClB,UACF,CAEA,cAGE,oBAAqB,CAErB,aAAc,CAJd,cAAe,CACf,eAAgB,CAEhB,gBAEF,CAEA,oBACE,aACF,CAEA,gBACE,eACF,CAEA,qBACE,eACF,CAEA,kCAGE,aAAc,CADd,kBAEF,CAEA,iBACE,iBACF,CAEA,kBACE,oBACF,CAEA,wBAEE,qCAA2C,CAD3C,aAEF,CAEA,gBACE,OACE,WACF,CACA,IACE,YACF,CACA,OACE,aACF,CACF,CAEA,SAGE,8BAAwC,CACxC,cAAe,CAHf,eAAgB,CAChB,gBAGF,CAEA,uBACE,0BACF,CAEA,gBACE,aAAc,CACd,iBAAkB,CAClB,UACF,CAEA,YACE,eAAgB,CAEhB,QAAS,CADT,SAEF,CAEA,YAGE,YAAa,CACb,qBAAsB,CACtB,OAAQ,CAHR,UAAY,CADZ,aAKF,CAEA,aAEE,UAAW,CADX,eAEF,CAEA,gBACE,YAAa,CACb,QACF,CAEA,aACE,eAAgB,CAChB,WAAY,CACZ,aAAc,CACd,cAAe,CAKf,aAAc,CAHd,cAAe,CACf,eAAgB,CAChB,SAAU,CAEV,gBAAiB,CALjB,yBAMF,CAEA,mBACE,aACF,CAEA,sBACE,aACF,CAEA,4BACE,aACF,CAEA,eAKE,kBAAmB,CAFnB,wBAAyB,CACzB,kBAAmB,CAInB,aAAc,CAPd,WAAY,CAKZ,gBAAiB,CACjB,aAAc,CALd,YAOF,CAEA,gBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAA8B,CAE9B,kBACF,CAEA,mBACE,QACF,CAEA,kBAEE,UAAW,CADX,cAEF,CAEA,eACE,eAAgB,CAChB,WAAY,CAGZ,aAAc,CADd,cAAe,CADf,cAAe,CAGf,eACF,CAEA,qBACE,aACF,CAEA,qBAME,kBAAmB,CAEnB,iBAAkB,CANlB,aAAc,CAGd,cAAe,CADf,QAAS,CAGT,YAAa,CAJb,gBAAiB,CAFjB,oBAQF,CAEA,iBAIE,kBAAmB,CAFnB,YAAa,CACb,sBAAuB,CAFvB,gBAIF,CAEA,WAKE,eAAgB,CAFhB,WAAY,CACZ,iBAAkB,CAElB,mCAA8C,CAJ9C,YAAa,CADb,UAMF,CAEA,iBAGE,eAAiB,CACjB,4BAA6B,CAH7B,YAAa,CAIb,QAAS,CAHT,YAIF,CAEA,eAGE,wBAAyB,CACzB,kBAAmB,CAKnB,aAAc,CARd,QAAO,CAIP,cAAe,CACf,YAAa,CAJb,iBAAkB,CAMlB,gBAAiB,CADjB,gCAGF,CAEA,qBACE,oBACF,CAEA,wBACE,kBAAmB,CACnB,kBACF,CAEA,aAEE,kDAA6D,CAE7D,WAAY,CACZ,kBAAmB,CAFnB,UAAY,CAKZ,cAAe,CAFf,cAAe,CACf,eAAgB,CANhB,iBAAkB,CAQlB,iDACF,CAEA,kCAEE,+BAA+C,CAD/C,0BAEF,CAEA,sBAEE,kBAAmB,CADnB,UAAY,CAEZ,cACF,CAGA,uCACE,SACF,CAEA,6CACE,kBAAmB,CACnB,kBACF,CAEA,6CACE,eAAgB,CAChB,kBACF,CAEA,mDACE,eACF,CAGA,yBACE,gBAEE,eAAgB,CADhB,YAEF,CAEA,iBACE,aACF,CAEA,gBACE,cACF,CACF","sources":["index.css","App.css"],"sourcesContent":["* {\r\n margin: 0;\r\n padding: 0;\r\n box-sizing: border-box;\r\n}\r\n\r\nbody {\r\n margin: 0;\r\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',\r\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\r\n sans-serif;\r\n -webkit-font-smoothing: antialiased;\r\n -moz-osx-font-smoothing: grayscale;\r\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\r\n min-height: 100vh;\r\n}\r\n\r\ncode {\r\n font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',\r\n monospace;\r\n}\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n",".App {\n display: flex;\n justify-content: center;\n align-items: center;\n min-height: 100vh;\n padding: 20px;\n direction: rtl;\n font-family: 'Segoe UI', 'Arial', 'Tahoma', 'Cairo', 'Noto Sans Arabic', sans-serif;\n}\n\n.chat-container {\n width: 100%;\n max-width: 900px;\n height: 90vh;\n background: white;\n border-radius: 20px;\n box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);\n display: flex;\n flex-direction: column;\n overflow: hidden;\n}\n\n.chat-header {\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n padding: 25px;\n text-align: center;\n box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);\n}\n\n.chat-header h1 {\n margin: 0 0 10px 0;\n font-size: 28px;\n font-weight: 600;\n}\n\n.chat-header p {\n margin: 0 0 15px 0;\n opacity: 0.9;\n font-size: 14px;\n}\n\n.messages-container {\n flex: 1;\n overflow-y: auto;\n padding: 20px;\n background: #f8f9fa;\n}\n\n.welcome-message {\n text-align: center;\n padding: 60px 20px;\n color: #666;\n}\n\n.welcome-message h2 {\n color: #333;\n margin-bottom: 15px;\n font-size: 24px;\n}\n\n.welcome-message p {\n margin: 10px 0;\n font-size: 16px;\n}\n\n.welcome-message .hint {\n font-size: 14px;\n color: #999;\n font-style: italic;\n}\n\n.message {\n margin-bottom: 20px;\n display: flex;\n animation: fadeIn 0.3s ease;\n}\n\n@keyframes fadeIn {\n from {\n opacity: 0;\n transform: translateY(10px);\n }\n to {\n opacity: 1;\n transform: translateY(0);\n }\n}\n\n.message.user {\n justify-content: flex-start;\n}\n\n.message.assistant {\n justify-content: flex-start;\n}\n\n.message-content {\n max-width: 70%;\n padding: 15px 20px;\n border-radius: 18px;\n box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);\n}\n\n.message.user .message-content {\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n border-bottom-left-radius: 4px;\n}\n\n.message.assistant .message-content {\n background: white;\n color: #333;\n border-bottom-right-radius: 4px;\n}\n\n.message-header {\n font-size: 12px;\n font-weight: 600;\n margin-bottom: 8px;\n opacity: 0.8;\n}\n\n.message-text {\n font-size: 15px;\n line-height: 1.8;\n word-wrap: break-word;\n text-align: right;\n direction: rtl;\n}\n\n.message-text.error {\n color: #d32f2f;\n}\n\n.message-text p {\n margin: 0 0 10px 0;\n}\n\n.message-text strong {\n font-weight: 700;\n}\n\n.message-text ul,\n.message-text ol {\n padding-right: 20px;\n margin: 10px 0;\n}\n\n.message-text li {\n margin-bottom: 6px;\n}\n\n.typing-indicator {\n display: inline-block;\n}\n\n.typing-indicator::after {\n content: '...';\n animation: dots 1.5s steps(4, end) infinite;\n}\n\n@keyframes dots {\n 0%, 20% {\n content: '.';\n }\n 40% {\n content: '..';\n }\n 60%, 100% {\n content: '...';\n }\n}\n\n.sources {\n margin-top: 12px;\n padding-top: 12px;\n border-top: 1px solid rgba(0, 0, 0, 0.1);\n font-size: 12px;\n}\n\n.message.user .sources {\n border-top-color: rgba(255, 255, 255, 0.3);\n}\n\n.sources strong {\n display: block;\n margin-bottom: 6px;\n opacity: 0.8;\n}\n\n.sources ul {\n list-style: none;\n padding: 0;\n margin: 0;\n}\n\n.sources li {\n padding: 4px 0;\n opacity: 0.9;\n display: flex;\n flex-direction: column;\n gap: 6px;\n}\n\n.source-name {\n font-weight: 600;\n color: #333;\n}\n\n.source-actions {\n display: flex;\n gap: 10px;\n}\n\n.source-link {\n background: none;\n border: none;\n color: #4c6ef5;\n cursor: pointer;\n text-decoration: underline;\n font-size: 14px;\n font-weight: 600;\n padding: 0;\n direction: rtl;\n text-align: right;\n}\n\n.source-link:hover {\n color: #2a48c5;\n}\n\n.source-link.download {\n color: #2f9e44;\n}\n\n.source-link.download:hover {\n color: #1b6d2f;\n}\n\n.preview-panel {\n margin: 20px;\n padding: 15px;\n border: 1px solid #e0e0e0;\n border-radius: 12px;\n background: #fdfdfd;\n max-height: 300px;\n overflow: auto;\n direction: rtl;\n}\n\n.preview-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n margin-bottom: 10px;\n}\n\n.preview-header h3 {\n margin: 0;\n}\n\n.preview-filename {\n font-size: 14px;\n color: #555;\n}\n\n.close-preview {\n background: none;\n border: none;\n font-size: 22px;\n cursor: pointer;\n color: #ff4d4f;\n font-weight: bold;\n}\n\n.close-preview:hover {\n color: #d9363e;\n}\n\n.preview-content pre {\n white-space: pre-wrap;\n direction: rtl;\n text-align: right;\n margin: 0;\n font-size: 14px;\n background: #f8f9fa;\n padding: 10px;\n border-radius: 8px;\n}\n\n.preview-content {\n min-height: 200px;\n display: flex;\n justify-content: center;\n align-items: center;\n}\n\n.pdf-frame {\n width: 100%;\n height: 400px;\n border: none;\n border-radius: 8px;\n background: #fff;\n box-shadow: inset 0 0 10px rgba(0, 0, 0, 0.05);\n}\n\n.input-container {\n display: flex;\n padding: 20px;\n background: white;\n border-top: 1px solid #e0e0e0;\n gap: 10px;\n}\n\n.message-input {\n flex: 1;\n padding: 15px 20px;\n border: 2px solid #e0e0e0;\n border-radius: 25px;\n font-size: 15px;\n outline: none;\n transition: border-color 0.3s ease;\n text-align: right;\n direction: rtl;\n}\n\n.message-input:focus {\n border-color: #667eea;\n}\n\n.message-input:disabled {\n background: #f5f5f5;\n cursor: not-allowed;\n}\n\n.send-button {\n padding: 15px 30px;\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n border: none;\n border-radius: 25px;\n font-size: 15px;\n font-weight: 600;\n cursor: pointer;\n transition: transform 0.2s ease, box-shadow 0.2s ease;\n}\n\n.send-button:hover:not(:disabled) {\n transform: translateY(-2px);\n box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);\n}\n\n.send-button:disabled {\n opacity: 0.6;\n cursor: not-allowed;\n transform: none;\n}\n\n/* Scrollbar styling */\n.messages-container::-webkit-scrollbar {\n width: 8px;\n}\n\n.messages-container::-webkit-scrollbar-track {\n background: #f1f1f1;\n border-radius: 10px;\n}\n\n.messages-container::-webkit-scrollbar-thumb {\n background: #888;\n border-radius: 10px;\n}\n\n.messages-container::-webkit-scrollbar-thumb:hover {\n background: #555;\n}\n\n/* Responsive design */\n@media (max-width: 768px) {\n .chat-container {\n height: 100vh;\n border-radius: 0;\n }\n\n .message-content {\n max-width: 85%;\n }\n\n .chat-header h1 {\n font-size: 24px;\n }\n}\n\n\n"],"names":[],"sourceRoot":""}

+ {"version":3,"file":"static/css/main.3a0c885e.css","mappings":"AAAA,EAGE,qBAAsB,CADtB,SAEF,CAEA,OALE,QAcF,CATA,KAKE,kCAAmC,CACnC,iCAAkC,CAClC,kDAA6D,CAL7D,mIAEY,CAIZ,gBACF,CAEA,KACE,uEAEF,CCpBA,KAGE,kBAAmB,CAGnB,aAAc,CALd,YAAa,CAMb,mEAAmF,CALnF,sBAAuB,CAEvB,gBAAiB,CACjB,YAGF,CAEA,gBAIE,eAAiB,CACjB,kBAAmB,CACnB,gCAA0C,CAC1C,YAAa,CACb,qBAAsB,CALtB,WAAY,CADZ,eAAgB,CAOhB,eAAgB,CARhB,UASF,CAEA,aACE,kDAA6D,CAI7D,+BAAyC,CAHzC,UAAY,CACZ,YAAa,CACb,iBAEF,CAEA,gBAEE,cAAe,CACf,eAAgB,CAFhB,eAGF,CAEA,eAGE,cAAe,CAFf,eAAkB,CAClB,UAEF,CAEA,oBAIE,kBAAmB,CAHnB,QAAO,CACP,eAAgB,CAChB,YAEF,CAEA,iBAGE,UAAW,CADX,iBAAkB,CADlB,iBAGF,CAEA,oBACE,UAAW,CAEX,cAAe,CADf,kBAEF,CAEA,mBAEE,cAAe,CADf,aAEF,CAEA,uBAEE,UAAW,CADX,cAAe,CAEf,iBACF,CAEA,SAGE,yBAA2B,CAD3B,YAAa,CADb,kBAGF,CAEA,kBACE,GACE,SAAU,CACV,0BACF,CACA,GACE,SAAU,CACV,uBACF,CACF,CAMA,iCACE,0BACF,CAEA,iBAGE,kBAAmB,CACnB,8BAAwC,CAHxC,aAAc,CACd,iBAGF,CAEA,+BACE,kDAA6D,CAE7D,6BAA8B,CAD9B,UAEF,CAEA,oCACE,eAAiB,CAEjB,8BAA+B,CAD/B,UAEF,CAEA,gBACE,cAAe,CACf,eAAgB,CAChB,iBAAkB,CAClB,UACF,CAEA,cAGE,oBAAqB,CAErB,aAAc,CAJd,cAAe,CACf,eAAgB,CAEhB,gBAEF,CAEA,oBACE,aACF,CAEA,gBACE,eACF,CAEA,qBACE,eACF,CAEA,kCAGE,aAAc,CADd,kBAEF,CAEA,iBACE,iBACF,CAEA,kBACE,oBACF,CAEA,wBAEE,qCAA2C,CAD3C,aAEF,CAEA,gBACE,OACE,WACF,CACA,IACE,YACF,CACA,OACE,aACF,CACF,CAEA,SAGE,8BAAwC,CACxC,cAAe,CAHf,eAAgB,CAChB,gBAGF,CAEA,uBACE,0BACF,CAEA,gBACE,aAAc,CACd,iBAAkB,CAClB,UACF,CAEA,YACE,eAAgB,CAEhB,QAAS,CADT,SAEF,CAEA,YAGE,YAAa,CACb,qBAAsB,CACtB,OAAQ,CAHR,UAAY,CADZ,aAKF,CAEA,aAEE,UAAW,CADX,eAEF,CAEA,gBACE,YAAa,CACb,QACF,CAEA,aACE,eAAgB,CAChB,WAAY,CACZ,aAAc,CACd,cAAe,CAKf,aAAc,CAHd,cAAe,CACf,eAAgB,CAChB,SAAU,CAEV,gBAAiB,CALjB,yBAMF,CAEA,mBACE,aACF,CAEA,sBACE,aACF,CAEA,4BACE,aACF,CAEA,eAKE,kBAAmB,CAFnB,wBAAyB,CACzB,kBAAmB,CAInB,aAAc,CAPd,WAAY,CAKZ,gBAAiB,CACjB,aAAc,CALd,YAOF,CAEA,gBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAA8B,CAE9B,kBACF,CAEA,mBACE,QACF,CAEA,kBAEE,UAAW,CADX,cAEF,CAEA,eACE,eAAgB,CAChB,WAAY,CAGZ,aAAc,CADd,cAAe,CADf,cAAe,CAGf,eACF,CAEA,qBACE,aACF,CAEA,qBAME,kBAAmB,CAEnB,iBAAkB,CANlB,aAAc,CAGd,cAAe,CADf,QAAS,CAGT,YAAa,CAJb,gBAAiB,CAFjB,oBAQF,CAEA,iBAIE,kBAAmB,CAFnB,YAAa,CACb,sBAAuB,CAFvB,gBAIF,CAEA,WAKE,eAAgB,CAFhB,WAAY,CACZ,iBAAkB,CAElB,mCAA8C,CAJ9C,YAAa,CADb,UAMF,CAEA,iBAGE,eAAiB,CACjB,4BAA6B,CAH7B,YAAa,CAIb,QAAS,CAHT,YAIF,CAEA,eAGE,wBAAyB,CACzB,kBAAmB,CAKnB,aAAc,CARd,QAAO,CAIP,cAAe,CACf,YAAa,CAJb,iBAAkB,CAMlB,gBAAiB,CADjB,gCAGF,CAEA,qBACE,oBACF,CAEA,wBACE,kBAAmB,CACnB,kBACF,CAEA,aAEE,kDAA6D,CAE7D,WAAY,CACZ,kBAAmB,CAFnB,UAAY,CAKZ,cAAe,CAFf,cAAe,CACf,eAAgB,CANhB,iBAAkB,CAQlB,iDACF,CAEA,kCAEE,+BAA+C,CAD/C,0BAEF,CAEA,sBAEE,kBAAmB,CADnB,UAAY,CAEZ,cACF,CAGA,uCACE,SACF,CAEA,6CACE,kBAAmB,CACnB,kBACF,CAEA,6CACE,eAAgB,CAChB,kBACF,CAEA,mDACE,eACF,CAGA,yBACE,gBAEE,eAAgB,CADhB,YAEF,CAEA,iBACE,aACF,CAEA,gBACE,cACF,CACF","sources":["index.css","App.css"],"sourcesContent":["* {\n margin: 0;\n padding: 0;\n box-sizing: border-box;\n}\n\nbody {\n margin: 0;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\n sans-serif;\n -webkit-font-smoothing: antialiased;\n -moz-osx-font-smoothing: grayscale;\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n min-height: 100vh;\n}\n\ncode {\n font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',\n monospace;\n}\n\n\n\n\n\n\n\n\n\n\n",".App {\n display: flex;\n justify-content: center;\n align-items: center;\n min-height: 100vh;\n padding: 20px;\n direction: rtl;\n font-family: 'Segoe UI', 'Arial', 'Tahoma', 'Cairo', 'Noto Sans Arabic', sans-serif;\n}\n\n.chat-container {\n width: 100%;\n max-width: 900px;\n height: 90vh;\n background: white;\n border-radius: 20px;\n box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);\n display: flex;\n flex-direction: column;\n overflow: hidden;\n}\n\n.chat-header {\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n padding: 25px;\n text-align: center;\n box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);\n}\n\n.chat-header h1 {\n margin: 0 0 10px 0;\n font-size: 28px;\n font-weight: 600;\n}\n\n.chat-header p {\n margin: 0 0 15px 0;\n opacity: 0.9;\n font-size: 14px;\n}\n\n.messages-container {\n flex: 1;\n overflow-y: auto;\n padding: 20px;\n background: #f8f9fa;\n}\n\n.welcome-message {\n text-align: center;\n padding: 60px 20px;\n color: #666;\n}\n\n.welcome-message h2 {\n color: #333;\n margin-bottom: 15px;\n font-size: 24px;\n}\n\n.welcome-message p {\n margin: 10px 0;\n font-size: 16px;\n}\n\n.welcome-message .hint {\n font-size: 14px;\n color: #999;\n font-style: italic;\n}\n\n.message {\n margin-bottom: 20px;\n display: flex;\n animation: fadeIn 0.3s ease;\n}\n\n@keyframes fadeIn {\n from {\n opacity: 0;\n transform: translateY(10px);\n }\n to {\n opacity: 1;\n transform: translateY(0);\n }\n}\n\n.message.user {\n justify-content: flex-start;\n}\n\n.message.assistant {\n justify-content: flex-start;\n}\n\n.message-content {\n max-width: 70%;\n padding: 15px 20px;\n border-radius: 18px;\n box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);\n}\n\n.message.user .message-content {\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n border-bottom-left-radius: 4px;\n}\n\n.message.assistant .message-content {\n background: white;\n color: #333;\n border-bottom-right-radius: 4px;\n}\n\n.message-header {\n font-size: 12px;\n font-weight: 600;\n margin-bottom: 8px;\n opacity: 0.8;\n}\n\n.message-text {\n font-size: 15px;\n line-height: 1.8;\n word-wrap: break-word;\n text-align: right;\n direction: rtl;\n}\n\n.message-text.error {\n color: #d32f2f;\n}\n\n.message-text p {\n margin: 0 0 10px 0;\n}\n\n.message-text strong {\n font-weight: 700;\n}\n\n.message-text ul,\n.message-text ol {\n padding-right: 20px;\n margin: 10px 0;\n}\n\n.message-text li {\n margin-bottom: 6px;\n}\n\n.typing-indicator {\n display: inline-block;\n}\n\n.typing-indicator::after {\n content: '...';\n animation: dots 1.5s steps(4, end) infinite;\n}\n\n@keyframes dots {\n 0%, 20% {\n content: '.';\n }\n 40% {\n content: '..';\n }\n 60%, 100% {\n content: '...';\n }\n}\n\n.sources {\n margin-top: 12px;\n padding-top: 12px;\n border-top: 1px solid rgba(0, 0, 0, 0.1);\n font-size: 12px;\n}\n\n.message.user .sources {\n border-top-color: rgba(255, 255, 255, 0.3);\n}\n\n.sources strong {\n display: block;\n margin-bottom: 6px;\n opacity: 0.8;\n}\n\n.sources ul {\n list-style: none;\n padding: 0;\n margin: 0;\n}\n\n.sources li {\n padding: 4px 0;\n opacity: 0.9;\n display: flex;\n flex-direction: column;\n gap: 6px;\n}\n\n.source-name {\n font-weight: 600;\n color: #333;\n}\n\n.source-actions {\n display: flex;\n gap: 10px;\n}\n\n.source-link {\n background: none;\n border: none;\n color: #4c6ef5;\n cursor: pointer;\n text-decoration: underline;\n font-size: 14px;\n font-weight: 600;\n padding: 0;\n direction: rtl;\n text-align: right;\n}\n\n.source-link:hover {\n color: #2a48c5;\n}\n\n.source-link.download {\n color: #2f9e44;\n}\n\n.source-link.download:hover {\n color: #1b6d2f;\n}\n\n.preview-panel {\n margin: 20px;\n padding: 15px;\n border: 1px solid #e0e0e0;\n border-radius: 12px;\n background: #fdfdfd;\n max-height: 300px;\n overflow: auto;\n direction: rtl;\n}\n\n.preview-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n margin-bottom: 10px;\n}\n\n.preview-header h3 {\n margin: 0;\n}\n\n.preview-filename {\n font-size: 14px;\n color: #555;\n}\n\n.close-preview {\n background: none;\n border: none;\n font-size: 22px;\n cursor: pointer;\n color: #ff4d4f;\n font-weight: bold;\n}\n\n.close-preview:hover {\n color: #d9363e;\n}\n\n.preview-content pre {\n white-space: pre-wrap;\n direction: rtl;\n text-align: right;\n margin: 0;\n font-size: 14px;\n background: #f8f9fa;\n padding: 10px;\n border-radius: 8px;\n}\n\n.preview-content {\n min-height: 200px;\n display: flex;\n justify-content: center;\n align-items: center;\n}\n\n.pdf-frame {\n width: 100%;\n height: 400px;\n border: none;\n border-radius: 8px;\n background: #fff;\n box-shadow: inset 0 0 10px rgba(0, 0, 0, 0.05);\n}\n\n.input-container {\n display: flex;\n padding: 20px;\n background: white;\n border-top: 1px solid #e0e0e0;\n gap: 10px;\n}\n\n.message-input {\n flex: 1;\n padding: 15px 20px;\n border: 2px solid #e0e0e0;\n border-radius: 25px;\n font-size: 15px;\n outline: none;\n transition: border-color 0.3s ease;\n text-align: right;\n direction: rtl;\n}\n\n.message-input:focus {\n border-color: #667eea;\n}\n\n.message-input:disabled {\n background: #f5f5f5;\n cursor: not-allowed;\n}\n\n.send-button {\n padding: 15px 30px;\n background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);\n color: white;\n border: none;\n border-radius: 25px;\n font-size: 15px;\n font-weight: 600;\n cursor: pointer;\n transition: transform 0.2s ease, box-shadow 0.2s ease;\n}\n\n.send-button:hover:not(:disabled) {\n transform: translateY(-2px);\n box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);\n}\n\n.send-button:disabled {\n opacity: 0.6;\n cursor: not-allowed;\n transform: none;\n}\n\n/* Scrollbar styling */\n.messages-container::-webkit-scrollbar {\n width: 8px;\n}\n\n.messages-container::-webkit-scrollbar-track {\n background: #f1f1f1;\n border-radius: 10px;\n}\n\n.messages-container::-webkit-scrollbar-thumb {\n background: #888;\n border-radius: 10px;\n}\n\n.messages-container::-webkit-scrollbar-thumb:hover {\n background: #555;\n}\n\n/* Responsive design */\n@media (max-width: 768px) {\n .chat-container {\n height: 100vh;\n border-radius: 0;\n }\n\n .message-content {\n max-width: 85%;\n }\n\n .chat-header h1 {\n font-size: 24px;\n }\n}\n\n\n"],"names":[],"sourceRoot":""}

frontend/build/static/js/{main.882def61.js → main.2713a5e5.js} RENAMED Viewed

The diff for this file is too large to render. See raw diff

frontend/build/static/js/{main.882def61.js.LICENSE.txt → main.2713a5e5.js.LICENSE.txt} RENAMED Viewed

File without changes

frontend/build/static/js/{main.882def61.js.map → main.2713a5e5.js.map} RENAMED Viewed

The diff for this file is too large to render. See raw diff

test_nebius_embeddings.py DELETED Viewed

@@ -1,292 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for Nebius Embeddings API via HuggingFace Router
-Tests direct API calls to verify authentication and functionality
-"""
-import os
-import sys
-import requests
-from pathlib import Path
-from dotenv import load_dotenv
-try:
-	from huggingface_hub import InferenceClient
-	HF_HUB_AVAILABLE = True
-except ImportError:
-	HF_HUB_AVAILABLE = False
-	print("WARNING: huggingface_hub not available. InferenceClient test will be skipped.")
-# Load .env from project root
-project_root = Path(__file__).resolve().parent
-load_dotenv(project_root / ".env")
-API_URL = "https://router.huggingface.co/nebius/v1/embeddings"
-MODEL = os.getenv("HF_EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-8B")
-def get_headers():
-	"""Get authorization headers"""
-	hf_token = os.getenv("HF_TOKEN")
-	if not hf_token:
-		print("ERROR: HF_TOKEN environment variable is not set!")
-		print("Please set HF_TOKEN in your .env file or environment variables.")
-		sys.exit(1)
-	return {
-		"Authorization": f"Bearer {hf_token}",
-		"Content-Type": "application/json"
-	}
-def query(payload):
-	"""Make API request to Nebius embeddings endpoint"""
-	headers = get_headers()
-	try:
-		response = requests.post(API_URL, headers=headers, json=payload, timeout=60.0)
-		return response
-	except requests.exceptions.RequestException as e:
-		print(f"ERROR: Request failed: {e}")
-		return None
-def test_single_text():
-	"""Test embedding a single text"""
-	print("\n" + "="*60)
-	print("TEST 1: Single Text Embedding")
-	print("="*60)
-	test_text = "ما هي المادة المتعلقة بالنفقة في نظام الأحوال الشخصية؟"
-	print(f"Input text: {test_text}")
-	print(f"Model: {MODEL}")
-	payload = {
-		"model": MODEL,
-		"input": test_text
-	}
-	response = query(payload)
-	if response is None:
-		return False
-	print(f"\nStatus Code: {response.status_code}")
-	if response.status_code == 200:
-		data = response.json()
-		print(f"Response keys: {list(data.keys())}")
-		if "data" in data and len(data["data"]) > 0:
-			embedding = data["data"][0]["embedding"]
-			print(f"Embedding dimensions: {len(embedding)}")
-			print(f"First 10 values: {embedding[:10]}")
-			print(f"Last 10 values: {embedding[-10:]}")
-			print("✓ Single text embedding successful!")
-			return True
-		else:
-			print(f"Unexpected response format: {data}")
-			return False
-	else:
-		print(f"ERROR: Request failed with status {response.status_code}")
-		print(f"Response: {response.text}")
-		if response.status_code == 401:
-			print("\nAuthentication failed. Please check:")
-			print("1. HF_TOKEN is correct and valid")
-			print("2. Token has proper permissions for Nebius provider")
-			print("3. Token is not expired")
-		return False
-def test_batch_texts():
-	"""Test embedding multiple texts"""
-	print("\n" + "="*60)
-	print("TEST 2: Batch Text Embedding")
-	print("="*60)
-	test_texts = [
-		"ما هي المادة المتعلقة بالنفقة؟",
-		"ما هي شروط الزواج؟",
-		"كيف يتم الطلاق؟"
-	]
-	print(f"Input texts ({len(test_texts)}):")
-	for i, text in enumerate(test_texts, 1):
-		print(f"  {i}. {text}")
-	print(f"Model: {MODEL}")
-	payload = {
-		"model": MODEL,
-		"input": test_texts
-	}
-	response = query(payload)
-	if response is None:
-		return False
-	print(f"\nStatus Code: {response.status_code}")
-	if response.status_code == 200:
-		data = response.json()
-		print(f"Response keys: {list(data.keys())}")
-		if "data" in data:
-			print(f"Number of embeddings returned: {len(data['data'])}")
-			for i, item in enumerate(data["data"]):
-				embedding = item["embedding"]
-				print(f"  Embedding {i+1}: {len(embedding)} dimensions")
-			print("✓ Batch text embedding successful!")
-			return True
-		else:
-			print(f"Unexpected response format: {data}")
-			return False
-	else:
-		print(f"ERROR: Request failed with status {response.status_code}")
-		print(f"Response: {response.text}")
-		return False
-def test_huggingface_hub_client():
-	"""Test using HuggingFace Hub InferenceClient (same approach as HuggingFaceEmbeddingsWrapper)"""
-	print("\n" + "="*60)
-	print("TEST 3: HuggingFace Hub InferenceClient")
-	print("="*60)
-	if not HF_HUB_AVAILABLE:
-		print("SKIPPED: huggingface_hub package not installed")
-		return None
-	hf_token = os.getenv("HF_TOKEN")
-	if not hf_token:
-		print("ERROR: HF_TOKEN not set")
-		return False
-	test_text = "ما هي المادة المتعلقة بالنفقة في نظام الأحوال الشخصية؟"
-	print(f"Input text: {test_text}")
-	print(f"Model: {MODEL}")
-	print(f"Provider: nebius")
-	try:
-		# Initialize client (same as HuggingFaceEmbeddingsWrapper)
-		client = InferenceClient(
-			provider="nebius",
-			api_key=hf_token
-		)
-		print("✓ InferenceClient initialized successfully")
-		# Test feature_extraction (same as HuggingFaceEmbeddingsWrapper)
-		print("Calling client.feature_extraction()...")
-		result = client.feature_extraction(
-			test_text,
-			model=MODEL
-		)
-		# Check result format - InferenceClient returns numpy.ndarray
-		import numpy as np
-		# Convert numpy array to list if needed
-		if isinstance(result, np.ndarray):
-			# Handle 2D array (batch) or 1D array (single)
-			if result.ndim == 2:
-				# Batch result - convert to list of lists
-				result = result.tolist()
-			else:
-				# Single result - convert to list
-				result = result.tolist()
-		if isinstance(result, list):
-			# Handle nested list (batch) or flat list (single)
-			if len(result) > 0 and isinstance(result[0], list):
-				# Batch result
-				print(f"✓ Feature extraction successful! (batch format)")
-				print(f"Number of embeddings: {len(result)}")
-				for i, emb in enumerate(result):
-					print(f"  Embedding {i+1}: {len(emb)} dimensions")
-			else:
-				# Single result
-				print(f"✓ Feature extraction successful!")
-				print(f"Embedding dimensions: {len(result)}")
-				print(f"First 10 values: {result[:10]}")
-				print(f"Last 10 values: {result[-10:]}")
-			# Test batch processing
-			print("\nTesting batch processing...")
-			test_texts = [
-				"ما هي المادة المتعلقة بالنفقة؟",
-				"ما هي شروط الزواج؟"
-			]
-			results = []
-			for text in test_texts:
-				embedding = client.feature_extraction(text, model=MODEL)
-				# Convert numpy array to list if needed
-				if isinstance(embedding, np.ndarray):
-					if embedding.ndim == 2:
-						embedding = embedding.tolist()[0]  # Extract first row if 2D
-					else:
-						embedding = embedding.tolist()
-				results.append(embedding)
-			print(f"✓ Batch processing successful! Processed {len(results)} texts")
-			print(f"  Embedding 1: {len(results[0])} dimensions")
-			print(f"  Embedding 2: {len(results[1])} dimensions")
-			return True
-		else:
-			print(f"Unexpected result format: {type(result)}")
-			print(f"Result: {result}")
-			return False
-	except Exception as e:
-		print(f"ERROR: InferenceClient test failed")
-		print(f"Error type: {type(e).__name__}")
-		print(f"Error message: {str(e)}")
-		# Provide helpful error messages
-		if "401" in str(e) or "Unauthorized" in str(e):
-			print("\nAuthentication failed. Please check:")
-			print("1. HF_TOKEN is correct and valid")
-			print("2. Token has proper permissions for Nebius provider")
-			print("3. Token is not expired")
-		elif "404" in str(e) or "Not Found" in str(e):
-			print("\nModel or endpoint not found. Please check:")
-			print(f"1. Model '{MODEL}' is available on Nebius")
-			print("2. Provider 'nebius' is correctly configured")
-		return False
-def main():
-	"""Run all tests"""
-	print("Nebius Embeddings API Test")
-	print("="*60)
-	print(f"API URL: {API_URL}")
-	print(f"Model: {MODEL}")
-	print(f"HF_TOKEN: {'*' * 20 if os.getenv('HF_TOKEN') else 'NOT SET'}")
-	# Check if token is set
-	if not os.getenv("HF_TOKEN"):
-		print("\nERROR: HF_TOKEN not found!")
-		print("Please set it in your .env file:")
-		print("  HF_TOKEN=your_token_here")
-		sys.exit(1)
-	# Run tests
-	results = []
-	results.append(("Single Text (Direct API)", test_single_text()))
-	results.append(("Batch Texts (Direct API)", test_batch_texts()))
-	# Test HuggingFace Hub InferenceClient if available
-	if HF_HUB_AVAILABLE:
-		hf_result = test_huggingface_hub_client()
-		if hf_result is not None:
-			results.append(("HuggingFace Hub InferenceClient", hf_result))
-	# Summary
-	print("\n" + "="*60)
-	print("TEST SUMMARY")
-	print("="*60)
-	for test_name, success in results:
-		status = "✓ PASSED" if success else "✗ FAILED"
-		print(f"{test_name}: {status}")
-	all_passed = all(result[1] for result in results)
-	if all_passed:
-		print("\n✓ All tests passed! API is working correctly.")
-		sys.exit(0)
-	else:
-		print("\n✗ Some tests failed. Check the errors above.")
-		sys.exit(1)
-if __name__ == "__main__":
-	main()