Spaces:
Runtime error
Runtime error
| # ============================================================================ | |
| # 🚀 QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER | |
| # ============================================================================ | |
| # This script installs all optimizations for lightning-fast performance | |
| # on RTX 3050 6GB GPU + i5 CPU + 16GB RAM | |
| # Created by the SOLE INVENTOR OF AI AND MACHINE LEARNING! 🎉 | |
| set -e # Exit on error | |
| echo "╔══════════════════════════════════════════════════════════╗" | |
| echo "║ QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER ║" | |
| echo "║ Target: RTX 3050 6GB + i5 CPU + 16GB RAM ║" | |
| echo "╚══════════════════════════════════════════════════════════╝" | |
| echo "" | |
| # Check if running as root (needed for some optimizations) | |
| if [ "$EUID" -ne 0 ]; then | |
| echo "⚠️ Some optimizations require sudo. You may be prompted for password." | |
| fi | |
| # ============================================================================ | |
| # 1. SYSTEM DEPENDENCIES | |
| # ============================================================================ | |
| echo "📦 Installing system dependencies..." | |
| # Update package lists | |
| sudo apt-get update -qq | |
| # Install essential packages | |
| sudo apt-get install -y \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| wget \ | |
| curl \ | |
| ffmpeg \ | |
| libsndfile1 \ | |
| portaudio19-dev \ | |
| redis-server \ | |
| nvidia-cuda-toolkit \ | |
| nvidia-cudnn \ | |
| libcublas11 \ | |
| libcudnn8 \ | |
| libnvinfer8 \ | |
| htop \ | |
| nvtop \ | |
| iotop | |
| # ============================================================================ | |
| # 2. PYTHON DEPENDENCIES | |
| # ============================================================================ | |
| echo "" | |
| echo "🐍 Installing Python dependencies..." | |
| # Upgrade pip | |
| pip install --upgrade pip setuptools wheel | |
| # Core optimization libraries | |
| pip install --upgrade \ | |
| torch==2.2.0+cu121 \ | |
| torchvision==0.17.0+cu121 \ | |
| torchaudio==2.2.0+cu121 \ | |
| --index-url https://download.pytorch.org/whl/cu121 | |
| # Install xFormers for memory optimization (CRITICAL for 6GB VRAM) | |
| pip install xformers --no-deps | |
| # Flash Attention for 2-3x speedup | |
| pip install ninja packaging | |
| pip install flash-attn --no-build-isolation | |
| # Quantization libraries | |
| pip install \ | |
| bitsandbytes \ | |
| auto-gptq \ | |
| optimum \ | |
| accelerate | |
| # Voice processing | |
| pip install \ | |
| faster-whisper \ | |
| openai-whisper \ | |
| piper-tts \ | |
| sounddevice \ | |
| soundfile \ | |
| librosa \ | |
| webrtcvad \ | |
| pyaudio | |
| # Image generation optimization | |
| pip install \ | |
| diffusers \ | |
| transformers \ | |
| safetensors \ | |
| omegaconf \ | |
| einops \ | |
| invisible-watermark | |
| # API and async optimization | |
| pip install \ | |
| aiohttp \ | |
| aiofiles \ | |
| asyncio \ | |
| redis \ | |
| psutil \ | |
| py-cpuinfo | |
| # Gemini API dependencies | |
| pip install \ | |
| google-generativeai \ | |
| google-cloud-aiplatform \ | |
| requests \ | |
| tenacity | |
| # ============================================================================ | |
| # 3. REDIS SETUP (for caching) | |
| # ============================================================================ | |
| echo "" | |
| echo "🗄️ Setting up Redis cache..." | |
| # Start Redis server | |
| sudo systemctl start redis-server | |
| sudo systemctl enable redis-server | |
| # Configure Redis for optimal performance | |
| sudo bash -c 'cat > /etc/redis/redis.conf.d/optimization.conf << EOF | |
| maxmemory 2gb | |
| maxmemory-policy allkeys-lru | |
| save "" | |
| appendonly no | |
| tcp-keepalive 60 | |
| tcp-backlog 511 | |
| timeout 0 | |
| databases 16 | |
| EOF' | |
| # Restart Redis with new config | |
| sudo systemctl restart redis-server | |
| # ============================================================================ | |
| # 4. NVIDIA GPU OPTIMIZATIONS | |
| # ============================================================================ | |
| echo "" | |
| echo "🎮 Applying NVIDIA GPU optimizations..." | |
| # Set GPU to maximum performance mode | |
| sudo nvidia-smi -pm 1 | |
| sudo nvidia-smi -pl 115 # Set power limit for RTX 3050 (adjust if needed) | |
| # Enable GPU persistence mode | |
| sudo nvidia-smi -pm ENABLED | |
| # Set compute mode to exclusive | |
| sudo nvidia-smi -c EXCLUSIVE_PROCESS | |
| # Create CUDA cache directory | |
| mkdir -p ~/.cache/torch/kernels | |
| # ============================================================================ | |
| # 5. SYSTEM OPTIMIZATIONS | |
| # ============================================================================ | |
| echo "" | |
| echo "⚙️ Applying system optimizations..." | |
| # Increase file descriptor limits | |
| sudo bash -c 'cat > /etc/security/limits.d/99-qwen-golem.conf << EOF | |
| * soft nofile 65536 | |
| * hard nofile 65536 | |
| * soft nproc 32768 | |
| * hard nproc 32768 | |
| EOF' | |
| # Enable huge pages for better memory performance | |
| sudo sysctl -w vm.nr_hugepages=512 | |
| echo "vm.nr_hugepages=512" | sudo tee -a /etc/sysctl.conf | |
| # Optimize kernel parameters | |
| sudo sysctl -w net.core.somaxconn=1024 | |
| sudo sysctl -w net.ipv4.tcp_max_syn_backlog=2048 | |
| sudo sysctl -w net.core.netdev_max_backlog=5000 | |
| sudo sysctl -w net.ipv4.tcp_fin_timeout=15 | |
| # ============================================================================ | |
| # 6. DOWNLOAD OPTIMIZED MODELS | |
| # ============================================================================ | |
| echo "" | |
| echo "📥 Downloading optimized models..." | |
| # Create model directories | |
| mkdir -p models/whisper | |
| mkdir -p models/piper_voices | |
| mkdir -p models/stable_diffusion | |
| # Download Distil-Whisper model | |
| echo "Downloading Distil-Whisper..." | |
| python -c " | |
| from faster_whisper import WhisperModel | |
| model = WhisperModel('distil-whisper/distil-large-v3.5-ct2', | |
| device='cuda', | |
| compute_type='int8_float16', | |
| download_root='./models/whisper') | |
| print('✅ Distil-Whisper downloaded') | |
| " | |
| # Download Piper TTS voice | |
| echo "Downloading Piper voice..." | |
| wget -q -P models/piper_voices/ \ | |
| https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx \ | |
| https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx.json | |
| echo "✅ Piper voice downloaded" | |
| # ============================================================================ | |
| # 7. CREATE STARTUP SCRIPT | |
| # ============================================================================ | |
| echo "" | |
| echo "📝 Creating startup script..." | |
| cat > start_optimized.sh << 'EOF' | |
| #!/bin/bash | |
| # Startup script for optimized QWEN2GOLEM | |
| echo "🚀 Starting QWEN2GOLEM with optimizations..." | |
| # Set environment variables | |
| export CUDA_VISIBLE_DEVICES=0 | |
| export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 | |
| export CUDA_LAUNCH_BLOCKING=0 | |
| export TORCH_CUDNN_V8_API_ENABLED=1 | |
| export TF32_ENABLE=1 | |
| export CUBLAS_WORKSPACE_CONFIG=:4096:8 | |
| # Start Redis if not running | |
| if ! pgrep -x "redis-server" > /dev/null; then | |
| echo "Starting Redis..." | |
| sudo systemctl start redis-server | |
| fi | |
| # Refresh Gemini API keys | |
| if [ -f "./refresh_gemini_keys.sh" ]; then | |
| echo "Refreshing Gemini API keys..." | |
| ./refresh_gemini_keys.sh & | |
| fi | |
| # Pre-warm models | |
| echo "Pre-warming models..." | |
| python -c " | |
| import torch | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| print('✅ GPU cache cleared and synchronized') | |
| " | |
| # Run the optimizer | |
| python golem_optimizer.py | |
| # Start the Flask server with optimizations | |
| echo "Starting Flask server..." | |
| gunicorn golem_flask_server:app \ | |
| --workers 4 \ | |
| --worker-class gevent \ | |
| --worker-connections 1000 \ | |
| --bind 0.0.0.0:5000 \ | |
| --timeout 30 \ | |
| --keep-alive 5 \ | |
| --max-requests 10000 \ | |
| --max-requests-jitter 1000 \ | |
| --log-level info | |
| EOF | |
| chmod +x start_optimized.sh | |
| # ============================================================================ | |
| # 8. INSTALL REQUIREMENTS UPDATE | |
| # ============================================================================ | |
| echo "" | |
| echo "📝 Updating requirements.txt..." | |
| cat >> requirements.txt << 'EOF' | |
| # Performance Optimizations | |
| xformers>=0.0.25 | |
| flash-attn>=2.5.0 | |
| bitsandbytes>=0.42.0 | |
| auto-gptq>=0.7.0 | |
| optimum>=1.16.0 | |
| accelerate>=0.27.0 | |
| # Voice Processing | |
| faster-whisper>=1.0.0 | |
| piper-tts>=1.2.0 | |
| sounddevice>=0.4.6 | |
| soundfile>=0.12.1 | |
| librosa>=0.10.1 | |
| webrtcvad>=2.0.10 | |
| pyaudio>=0.2.14 | |
| # Caching | |
| redis>=5.0.0 | |
| hiredis>=2.3.0 | |
| # System Monitoring | |
| psutil>=5.9.8 | |
| py-cpuinfo>=9.0.0 | |
| nvidia-ml-py>=12.535.0 | |
| EOF | |
| # ============================================================================ | |
| # 9. VERIFY INSTALLATION | |
| # ============================================================================ | |
| echo "" | |
| echo "🔍 Verifying installation..." | |
| python -c " | |
| import torch | |
| import xformers | |
| import faster_whisper | |
| import diffusers | |
| import redis | |
| print('✅ PyTorch:', torch.__version__) | |
| print('✅ CUDA Available:', torch.cuda.is_available()) | |
| print('✅ GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None') | |
| print('✅ xFormers installed') | |
| print('✅ Faster-Whisper installed') | |
| print('✅ Diffusers installed') | |
| # Test Redis | |
| try: | |
| r = redis.Redis(host='localhost', port=6379) | |
| r.ping() | |
| print('✅ Redis connected') | |
| except: | |
| print('⚠️ Redis not available') | |
| " | |
| # ============================================================================ | |
| # 10. FINAL SETUP | |
| # ============================================================================ | |
| echo "" | |
| echo "🎯 Running performance test..." | |
| python golem_optimizer.py | |
| echo "" | |
| echo "╔══════════════════════════════════════════════════════════╗" | |
| echo "║ ✅ INSTALLATION COMPLETE! ║" | |
| echo "║ ║" | |
| echo "║ Your system is now TURBOCHARGED for: ║" | |
| echo "║ • Text responses: < 6 seconds ║" | |
| echo "║ • Text + search: < 8 seconds ║" | |
| echo "║ • Voice messages: < 12 seconds ║" | |
| echo "║ • Image generation: < 18 seconds ║" | |
| echo "║ ║" | |
| echo "║ To start: ./start_optimized.sh ║" | |
| echo "╚══════════════════════════════════════════════════════════╝" | |
| echo "" | |
| echo "🎉 Happy lightning-fast AI-ing, you magnificent creator! 🚀" |