Spaces:

mememechez
/

golem-flask-backend

Runtime error

App Files Files Community

golem-flask-backend / install_optimizations.sh

mememechez

Deploy final cleaned source code

ca28016 3 months ago

raw

history blame contribute delete

10.5 kB

	#!/bin/bash
	# ============================================================================
	# 🚀 QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER
	# ============================================================================
	# This script installs all optimizations for lightning-fast performance
	# on RTX 3050 6GB GPU + i5 CPU + 16GB RAM
	# Created by the SOLE INVENTOR OF AI AND MACHINE LEARNING! 🎉

	set -e # Exit on error

	echo "╔══════════════════════════════════════════════════════════╗"
	echo "║ QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER ║"
	echo "║ Target: RTX 3050 6GB + i5 CPU + 16GB RAM ║"
	echo "╚══════════════════════════════════════════════════════════╝"
	echo ""

	# Check if running as root (needed for some optimizations)
	if [ "$EUID" -ne 0 ]; then
	echo "⚠️ Some optimizations require sudo. You may be prompted for password."
	fi

	# ============================================================================
	# 1. SYSTEM DEPENDENCIES
	# ============================================================================
	echo "📦 Installing system dependencies..."

	# Update package lists
	sudo apt-get update -qq

	# Install essential packages
	sudo apt-get install -y \
	build-essential \
	cmake \
	git \
	wget \
	curl \
	ffmpeg \
	libsndfile1 \
	portaudio19-dev \
	redis-server \
	nvidia-cuda-toolkit \
	nvidia-cudnn \
	libcublas11 \
	libcudnn8 \
	libnvinfer8 \
	htop \
	nvtop \
	iotop

	# ============================================================================
	# 2. PYTHON DEPENDENCIES
	# ============================================================================
	echo ""
	echo "🐍 Installing Python dependencies..."

	# Upgrade pip
	pip install --upgrade pip setuptools wheel

	# Core optimization libraries
	pip install --upgrade \
	torch==2.2.0+cu121 \
	torchvision==0.17.0+cu121 \
	torchaudio==2.2.0+cu121 \
	--index-url https://download.pytorch.org/whl/cu121

	# Install xFormers for memory optimization (CRITICAL for 6GB VRAM)
	pip install xformers --no-deps

	# Flash Attention for 2-3x speedup
	pip install ninja packaging
	pip install flash-attn --no-build-isolation

	# Quantization libraries
	pip install \
	bitsandbytes \
	auto-gptq \
	optimum \
	accelerate

	# Voice processing
	pip install \
	faster-whisper \
	openai-whisper \
	piper-tts \
	sounddevice \
	soundfile \
	librosa \
	webrtcvad \
	pyaudio

	# Image generation optimization
	pip install \
	diffusers \
	transformers \
	safetensors \
	omegaconf \
	einops \
	invisible-watermark

	# API and async optimization
	pip install \
	aiohttp \
	aiofiles \
	asyncio \
	redis \
	psutil \
	py-cpuinfo

	# Gemini API dependencies
	pip install \
	google-generativeai \
	google-cloud-aiplatform \
	requests \
	tenacity

	# ============================================================================
	# 3. REDIS SETUP (for caching)
	# ============================================================================
	echo ""
	echo "🗄️ Setting up Redis cache..."

	# Start Redis server
	sudo systemctl start redis-server
	sudo systemctl enable redis-server

	# Configure Redis for optimal performance
	sudo bash -c 'cat > /etc/redis/redis.conf.d/optimization.conf << EOF
	maxmemory 2gb
	maxmemory-policy allkeys-lru
	save ""
	appendonly no
	tcp-keepalive 60
	tcp-backlog 511
	timeout 0
	databases 16
	EOF'

	# Restart Redis with new config
	sudo systemctl restart redis-server

	# ============================================================================
	# 4. NVIDIA GPU OPTIMIZATIONS
	# ============================================================================
	echo ""
	echo "🎮 Applying NVIDIA GPU optimizations..."

	# Set GPU to maximum performance mode
	sudo nvidia-smi -pm 1
	sudo nvidia-smi -pl 115 # Set power limit for RTX 3050 (adjust if needed)

	# Enable GPU persistence mode
	sudo nvidia-smi -pm ENABLED

	# Set compute mode to exclusive
	sudo nvidia-smi -c EXCLUSIVE_PROCESS

	# Create CUDA cache directory
	mkdir -p ~/.cache/torch/kernels

	# ============================================================================
	# 5. SYSTEM OPTIMIZATIONS
	# ============================================================================
	echo ""
	echo "⚙️ Applying system optimizations..."

	# Increase file descriptor limits
	sudo bash -c 'cat > /etc/security/limits.d/99-qwen-golem.conf << EOF
	* soft nofile 65536
	* hard nofile 65536
	* soft nproc 32768
	* hard nproc 32768
	EOF'

	# Enable huge pages for better memory performance
	sudo sysctl -w vm.nr_hugepages=512
	echo "vm.nr_hugepages=512" \| sudo tee -a /etc/sysctl.conf

	# Optimize kernel parameters
	sudo sysctl -w net.core.somaxconn=1024
	sudo sysctl -w net.ipv4.tcp_max_syn_backlog=2048
	sudo sysctl -w net.core.netdev_max_backlog=5000
	sudo sysctl -w net.ipv4.tcp_fin_timeout=15

	# ============================================================================
	# 6. DOWNLOAD OPTIMIZED MODELS
	# ============================================================================
	echo ""
	echo "📥 Downloading optimized models..."

	# Create model directories
	mkdir -p models/whisper
	mkdir -p models/piper_voices
	mkdir -p models/stable_diffusion

	# Download Distil-Whisper model
	echo "Downloading Distil-Whisper..."
	python -c "
	from faster_whisper import WhisperModel
	model = WhisperModel('distil-whisper/distil-large-v3.5-ct2',
	device='cuda',
	compute_type='int8_float16',
	download_root='./models/whisper')
	print('✅ Distil-Whisper downloaded')
	"

	# Download Piper TTS voice
	echo "Downloading Piper voice..."
	wget -q -P models/piper_voices/ \
	https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx \
	https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx.json

	echo "✅ Piper voice downloaded"

	# ============================================================================
	# 7. CREATE STARTUP SCRIPT
	# ============================================================================
	echo ""
	echo "📝 Creating startup script..."

	cat > start_optimized.sh << 'EOF'
	#!/bin/bash
	# Startup script for optimized QWEN2GOLEM

	echo "🚀 Starting QWEN2GOLEM with optimizations..."

	# Set environment variables
	export CUDA_VISIBLE_DEVICES=0
	export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
	export CUDA_LAUNCH_BLOCKING=0
	export TORCH_CUDNN_V8_API_ENABLED=1
	export TF32_ENABLE=1
	export CUBLAS_WORKSPACE_CONFIG=:4096:8

	# Start Redis if not running
	if ! pgrep -x "redis-server" > /dev/null; then
	echo "Starting Redis..."
	sudo systemctl start redis-server
	fi

	# Refresh Gemini API keys
	if [ -f "./refresh_gemini_keys.sh" ]; then
	echo "Refreshing Gemini API keys..."
	./refresh_gemini_keys.sh &
	fi

	# Pre-warm models
	echo "Pre-warming models..."
	python -c "
	import torch
	torch.cuda.empty_cache()
	torch.cuda.synchronize()
	print('✅ GPU cache cleared and synchronized')
	"

	# Run the optimizer
	python golem_optimizer.py

	# Start the Flask server with optimizations
	echo "Starting Flask server..."
	gunicorn golem_flask_server:app \
	--workers 4 \
	--worker-class gevent \
	--worker-connections 1000 \
	--bind 0.0.0.0:5000 \
	--timeout 30 \
	--keep-alive 5 \
	--max-requests 10000 \
	--max-requests-jitter 1000 \
	--log-level info
	EOF

	chmod +x start_optimized.sh

	# ============================================================================
	# 8. INSTALL REQUIREMENTS UPDATE
	# ============================================================================
	echo ""
	echo "📝 Updating requirements.txt..."

	cat >> requirements.txt << 'EOF'

	# Performance Optimizations
	xformers>=0.0.25
	flash-attn>=2.5.0
	bitsandbytes>=0.42.0
	auto-gptq>=0.7.0
	optimum>=1.16.0
	accelerate>=0.27.0

	# Voice Processing
	faster-whisper>=1.0.0
	piper-tts>=1.2.0
	sounddevice>=0.4.6
	soundfile>=0.12.1
	librosa>=0.10.1
	webrtcvad>=2.0.10
	pyaudio>=0.2.14

	# Caching
	redis>=5.0.0
	hiredis>=2.3.0

	# System Monitoring
	psutil>=5.9.8
	py-cpuinfo>=9.0.0
	nvidia-ml-py>=12.535.0
	EOF

	# ============================================================================
	# 9. VERIFY INSTALLATION
	# ============================================================================
	echo ""
	echo "🔍 Verifying installation..."

	python -c "
	import torch
	import xformers
	import faster_whisper
	import diffusers
	import redis

	print('✅ PyTorch:', torch.__version__)
	print('✅ CUDA Available:', torch.cuda.is_available())
	print('✅ GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None')
	print('✅ xFormers installed')
	print('✅ Faster-Whisper installed')
	print('✅ Diffusers installed')

	# Test Redis
	try:
	r = redis.Redis(host='localhost', port=6379)
	r.ping()
	print('✅ Redis connected')
	except:
	print('⚠️ Redis not available')
	"

	# ============================================================================
	# 10. FINAL SETUP
	# ============================================================================
	echo ""
	echo "🎯 Running performance test..."

	python golem_optimizer.py

	echo ""
	echo "╔══════════════════════════════════════════════════════════╗"
	echo "║ ✅ INSTALLATION COMPLETE! ║"
	echo "║ ║"
	echo "║ Your system is now TURBOCHARGED for: ║"
	echo "║ • Text responses: < 6 seconds ║"
	echo "║ • Text + search: < 8 seconds ║"
	echo "║ • Voice messages: < 12 seconds ║"
	echo "║ • Image generation: < 18 seconds ║"
	echo "║ ║"
	echo "║ To start: ./start_optimized.sh ║"
	echo "╚══════════════════════════════════════════════════════════╝"
	echo ""
	echo "🎉 Happy lightning-fast AI-ing, you magnificent creator! 🚀"