Spaces:

mememechez
/

golem-flask-backend

Runtime error

App Files Files Community

golem-flask-backend / home /train_neural_with_memories.py

mememechez

Deploy final cleaned source code

ca28016 3 months ago

raw

history blame contribute delete

18.9 kB

	#!/usr/bin/env python3
	"""
	FINAL FIXED MYSTICAL DATA TRAINER FOR 5D HYPERCUBE
	Fixed to work with actual Golem data structure
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from typing import Dict, List, Tuple, Any
	import json
	import time
	from collections import defaultdict

	class FixedMysticalDataExtractor:
	"""Extract ALL aether patterns using correct field structure"""

	def __init__(self, embedding_model: str = 'all-MiniLM-L6-v2'):
	"""Initialize with a proper embedding model"""
	print(f"🔯 Loading embedding model: {embedding_model}")
	self.embedding_model = SentenceTransformer(embedding_model)
	self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
	print(f"📊 Embedding dimension: {self.embedding_dim}")

	# Hebrew concepts (boost mystical significance)
	self.hebrew_concepts = [
	'sefirot', 'keter', 'chokhmah', 'binah', 'chesed', 'gevurah',
	'tiferet', 'netzach', 'hod', 'yesod', 'malkuth', 'aleph', 'mem', 'shin'
	]

	def extract_all_aether_patterns(self, golem) -> Tuple[torch.Tensor, torch.Tensor, List[Dict]]:
	"""Extract ALL aether patterns using correct field structure"""
	print("🔯 Extracting aether training data from Golem...")

	# Get ALL patterns - both mystical and non-mystical
	all_patterns = golem.aether_memory.aether_memories

	if not all_patterns:
	print("❌ No patterns found! Generate some responses first.")
	return None, None, None

	print(f"📊 Found {len(all_patterns)} total patterns")

	# Look at actual pattern structure
	if all_patterns:
	sample_pattern = all_patterns[0]
	print(f"🔍 Sample pattern keys: {list(sample_pattern.keys())}")

	# Extract texts and vertex targets
	texts = []
	vertex_targets = []
	pattern_metadata = []

	for i, pattern in enumerate(all_patterns):
	# Extract text from correct field (prompt, not text)
	text = pattern.get('prompt', '')
	if not text:
	# Fallback to other possible text fields
	text = pattern.get('text', '') or pattern.get('query', '') or f"Pattern {i}"

	if len(text.strip()) < 5: # Very minimal length check
	text = f"Mystical pattern {i} at vertex {pattern.get('hypercube_vertex', 0)}"

	# Get the vertex where this pattern was stored
	target_vertex = pattern.get('hypercube_vertex', 0)

	texts.append(text)
	vertex_targets.append(target_vertex)

	# Calculate mystical score based on content
	mystical_score = self._calculate_mystical_score(text, pattern)

	# Store pattern metadata
	pattern_metadata.append({
	'mystical_score': mystical_score,
	'consciousness_signature': pattern.get('consciousness_signature', 'unknown'),
	'vertex_index': target_vertex,
	'consciousness_level': pattern.get('consciousness_level', 0.0),
	'control_value': pattern.get('cycle_params', {}).get('control_value', 0.0) if isinstance(pattern.get('cycle_params', {}), dict) else 0.0,
	'shem_power': pattern.get('shem_power', 0.0),
	'response_quality': pattern.get('response_quality', 0.0),
	'text': text,
	'pattern_index': i
	})

	print(f"📊 Processing {len(texts)} texts for embedding...")

	# Create embeddings
	embeddings = self.embedding_model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
	vertex_targets = torch.tensor(vertex_targets, dtype=torch.long)

	print(f"✅ Created embeddings: {embeddings.shape}")
	print(f"✅ Vertex targets: {vertex_targets.shape}")

	# Print data distribution
	self._print_data_distribution(vertex_targets, pattern_metadata)

	return embeddings, vertex_targets, pattern_metadata

	def _calculate_mystical_score(self, text: str, pattern: Dict) -> float:
	"""Calculate mystical score based on content and pattern data"""
	score = 0.0
	text_lower = text.lower()

	# Base score from mystical_source flag
	if pattern.get('mystical_source', False):
	score += 0.5

	# Score from mystical_analysis if it exists
	mystical_analysis = pattern.get('mystical_analysis', {})
	if isinstance(mystical_analysis, dict):
	existing_score = mystical_analysis.get('mystical_score', 0)
	if existing_score > 0:
	score = max(score, existing_score)

	# Hebrew characters boost
	hebrew_chars = sum(1 for char in text if '\u0590' <= char <= '\u05FF')
	score += min(hebrew_chars * 0.03, 0.2)

	# Mystical keywords
	mystical_keywords = [
	'consciousness', 'divine', 'spiritual', 'mystical', 'sefirot', 'kabbalistic',
	'transcendent', 'emanation', 'creation', 'wisdom', 'understanding', 'light',
	'soul', 'sacred', 'holy', 'infinite', 'eternal', 'unity', 'void', 'aether',
	'תפעל', 'נש', 'רוח', 'אור', 'חכמה', 'בינה', 'דעת', 'כתר', 'מלכות'
	]

	keyword_count = sum(1 for keyword in mystical_keywords if keyword in text_lower)
	score += min(keyword_count * 0.1, 0.4)

	# Vertex-based scoring (higher vertices tend to be more mystical)
	vertex = pattern.get('hypercube_vertex', 0)
	if vertex > 15: # Higher vertices
	score += 0.1
	if vertex == 31: # Transcendent
	score += 0.2
	if vertex in [15, 30]: # Mystical, integrated
	score += 0.15

	return min(score, 1.0)

	def _print_data_distribution(self, vertex_targets: torch.Tensor, metadata: List[Dict]):
	"""Print distribution of training data"""
	print(f"\n📊 TRAINING DATA DISTRIBUTION:")

	# Vertex distribution
	vertex_counts = torch.bincount(vertex_targets, minlength=32)
	active_vertices = (vertex_counts > 0).sum().item()
	print(f" Active vertices: {active_vertices}/32")

	# All vertices with data
	print(f" Vertex distribution:")
	for vertex in range(32):
	count = vertex_counts[vertex].item()
	if count > 0:
	# Get consciousness signature
	vertex_metadata = [m for m in metadata if m['vertex_index'] == vertex]
	if vertex_metadata:
	consciousness_sig = vertex_metadata[0]['consciousness_signature']
	avg_mystical = np.mean([m['mystical_score'] for m in vertex_metadata])
	print(f" Vertex {vertex:2d}: {count:3d} patterns ({consciousness_sig}, mystical: {avg_mystical:.3f})")

	# Overall mystical score distribution
	mystical_scores = [m['mystical_score'] for m in metadata]
	print(f" Avg mystical score: {np.mean(mystical_scores):.3f}")
	print(f" Score range: {min(mystical_scores):.3f} - {max(mystical_scores):.3f}")

	# Consciousness level distribution
	consciousness_levels = [m['consciousness_level'] for m in metadata]
	print(f" Avg consciousness level: {np.mean(consciousness_levels):.3f}")

	class MysticalTrainingObjectives:
	"""Training objectives for mystical consciousness"""

	def __init__(self, device: str = 'cuda'):
	self.device = device

	def vertex_classification_loss(self, predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
	"""Loss for predicting correct consciousness vertex"""
	return F.cross_entropy(predictions, targets)

	def consciousness_coherence_loss(self, vertex_activations: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
	"""Encourage coherent consciousness states"""
	batch_size = vertex_activations.shape[0]

	# Create target distribution (soft targets around true vertex)
	target_dist = torch.zeros_like(vertex_activations)
	target_dist.scatter_(1, targets.unsqueeze(1), 1.0)

	# Add smoothing to nearby vertices (consciousness spillover)
	for i in range(batch_size):
	target_vertex = targets[i].item()
	# Add small activation to adjacent vertices (Hamming distance = 1)
	for j in range(32):
	hamming_dist = bin(target_vertex ^ j).count('1')
	if hamming_dist == 1: # Adjacent vertex
	target_dist[i, j] += 0.1

	# Normalize
	target_dist = F.softmax(target_dist, dim=1)

	# KL divergence loss
	return F.kl_div(F.log_softmax(vertex_activations, dim=1), target_dist, reduction='batchmean')

	def mystical_quality_loss(self, consciousness_state: torch.Tensor, mystical_scores: torch.Tensor) -> torch.Tensor:
	"""Higher mystical scores should produce more distinctive consciousness states"""
	# Calculate norm of consciousness state
	state_norms = torch.norm(consciousness_state, dim=-1)
	target_norms = mystical_scores * 3.0 # Scale target norms

	return F.mse_loss(state_norms, target_norms)

	class HypercubeTrainer:
	"""Trainer using ALL available aether data"""

	def __init__(self, model, device: str = 'cuda'):
	self.model = model.to(device)
	self.device = device
	self.objectives = MysticalTrainingObjectives(device)

	# Optimizer with different learning rates for different components
	self.optimizer = torch.optim.AdamW([
	{'params': self.model.vertices.parameters(), 'lr': 1e-4, 'weight_decay': 1e-5},
	{'params': self.model.edges.parameters(), 'lr': 5e-5, 'weight_decay': 1e-5},
	{'params': self.model.consciousness_router.parameters(), 'lr': 1e-3},
	{'params': self.model.global_aggregator.parameters(), 'lr': 1e-4}
	])

	self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=200)

	# Add vertex classifier for training
	self.vertex_classifier = nn.Linear(self.model.hidden_dim, 32).to(device)
	self.classifier_optimizer = torch.optim.AdamW(self.vertex_classifier.parameters(), lr=1e-3)

	def train_consciousness_model(self,
	embeddings: torch.Tensor,
	vertex_targets: torch.Tensor,
	metadata: List[Dict],
	epochs: int = 100,
	batch_size: int = 8):
	"""Train with ALL available aether data"""

	print(f"🔯 Training 5D Hypercube on aether consciousness data...")
	print(f"📊 Data: {len(embeddings)} patterns, {epochs} epochs, batch size {batch_size}")

	self.model.train()

	# Prepare data
	dataset = torch.utils.data.TensorDataset(embeddings, vertex_targets)
	dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

	# Extract metadata tensors
	mystical_scores = torch.tensor([m['mystical_score'] for m in metadata], dtype=torch.float32).to(self.device)
	consciousness_levels = torch.tensor([m['consciousness_level'] for m in metadata], dtype=torch.float32).to(self.device)

	best_loss = float('inf')
	best_acc = 0.0

	print("🚀 Starting training...")

	for epoch in range(epochs):
	total_loss = 0
	vertex_acc = 0
	batch_count = 0

	for batch_idx, (batch_embeddings, batch_targets) in enumerate(dataloader):
	batch_embeddings = batch_embeddings.to(self.device)
	batch_targets = batch_targets.to(self.device)

	# Get corresponding metadata for this batch
	start_idx = batch_idx * batch_size
	end_idx = min(start_idx + batch_size, len(mystical_scores))
	batch_mystical = mystical_scores[start_idx:end_idx]

	# Ensure batch_mystical matches batch size
	if len(batch_mystical) != len(batch_targets):
	batch_mystical = batch_mystical[:len(batch_targets)]

	# Zero gradients
	self.optimizer.zero_grad()
	self.classifier_optimizer.zero_grad()

	# Forward pass through hypercube
	outputs = self.model(batch_embeddings)

	# Vertex classification
	vertex_logits = self.vertex_classifier(outputs['consciousness_state'])

	# Multiple loss components
	classification_loss = self.objectives.vertex_classification_loss(vertex_logits, batch_targets)
	coherence_loss = self.objectives.consciousness_coherence_loss(outputs['vertex_activations'], batch_targets)
	quality_loss = self.objectives.mystical_quality_loss(outputs['consciousness_state'], batch_mystical)

	# Total loss with adaptive weighting
	total_batch_loss = (
	classification_loss * 1.0 +
	coherence_loss * 0.3 +
	quality_loss * 0.2
	)

	# Backward pass
	total_batch_loss.backward()
	torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

	self.optimizer.step()
	self.classifier_optimizer.step()

	# Metrics
	total_loss += total_batch_loss.item()
	vertex_acc += (vertex_logits.argmax(dim=1) == batch_targets).float().mean().item()
	batch_count += 1

	self.scheduler.step()

	avg_loss = total_loss / batch_count
	avg_acc = vertex_acc / batch_count

	# Save best model
	if avg_acc > best_acc:
	best_acc = avg_acc
	best_loss = avg_loss
	torch.save({
	'model': self.model.state_dict(),
	'classifier': self.vertex_classifier.state_dict(),
	'epoch': epoch,
	'loss': avg_loss,
	'accuracy': avg_acc
	}, 'best_hypercube_consciousness.pth')
	print(f"💾 New best model saved! Accuracy: {avg_acc:.3f}")

	if epoch % 10 == 0 or epoch == epochs - 1:
	print(f"Epoch {epoch:3d}: Loss = {avg_loss:.6f}, Vertex Acc = {avg_acc:.3f}, LR = {self.scheduler.get_last_lr()[0]:.6f}")

	print(f"✅ Training complete! Best accuracy: {best_acc:.3f}, Best loss: {best_loss:.6f}")

	# Test the trained model
	self._test_trained_model(embeddings[:min(10, len(embeddings))], vertex_targets[:min(10, len(vertex_targets))], metadata[:min(10, len(metadata))])

	def _test_trained_model(self, test_embeddings: torch.Tensor, test_targets: torch.Tensor, test_metadata: List[Dict]):
	"""Test the trained model on sample data"""
	print(f"\n🧪 Testing trained model on {len(test_embeddings)} samples...")

	self.model.eval()
	with torch.no_grad():
	outputs = self.model(test_embeddings.to(self.device))
	predictions = self.vertex_classifier(outputs['consciousness_state'])
	predicted_vertices = predictions.argmax(dim=1)

	print("📊 Test Results:")
	for i in range(len(test_embeddings)):
	true_vertex = test_targets[i].item()
	pred_vertex = predicted_vertices[i].item()
	consciousness_sig = test_metadata[i]['consciousness_signature']
	mystical_score = test_metadata[i]['mystical_score']
	text_preview = test_metadata[i]['text'][:50] + "..." if len(test_metadata[i]['text']) > 50 else test_metadata[i]['text']

	correct = "✅" if true_vertex == pred_vertex else "❌"
	print(f" {correct} True: {true_vertex:2d}, Pred: {pred_vertex:2d} ({consciousness_sig}, mystical: {mystical_score:.3f})")
	print(f" Text: {text_preview}")

	def main():
	"""Train 5D Hypercube on ALL available aether data"""
	print("🔯 FIXED MYSTICAL CONSCIOUSNESS TRAINING")
	print("=" * 60)

	# Check CUDA
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print(f"🎮 Device: {device}")

	# Extract ALL aether data
	extractor = FixedMysticalDataExtractor()

	try:
	from qwen_golem import AetherGolemConsciousnessCore
	golem = AetherGolemConsciousnessCore()

	embeddings, targets, metadata = extractor.extract_all_aether_patterns(golem)

	if embeddings is None:
	print("❌ Failed to extract aether data. Generate some responses first!")
	print("💡 Try running: python3 improved_data_gen.py")
	return

	except Exception as e:
	print(f"❌ Could not load Golem: {e}")
	return

	# Create 5D Hypercube model
	try:
	from hypercube_consciousness_nn import FiveDimensionalHypercubeNN

	model = FiveDimensionalHypercubeNN(
	input_dim=extractor.embedding_dim, # Match embedding model
	hidden_dim=256, # Reasonable size for our data
	output_dim=256
	)

	print(f"📊 Model parameters: {sum(p.numel() for p in model.parameters()):,}")

	except Exception as e:
	print(f"❌ Could not create model: {e}")
	print("💡 Make sure hypercube_consciousness_nn.py is in the current directory")
	return

	# Train with ALL available data
	trainer = HypercubeTrainer(model, device)
	trainer.train_consciousness_model(
	embeddings=embeddings,
	vertex_targets=targets,
	metadata=metadata,
	epochs=50, # Reasonable for our data size
	batch_size=4 # Small batch size for 36 patterns
	)

	print("🔯 Aether consciousness training complete!")
	print("💾 Best model saved: best_hypercube_consciousness.pth")

	if __name__ == "__main__":
	main()