Spaces:

mememechez
/

golem-flask-backend

Runtime error

File size: 10,468 Bytes

ca28016

#!/bin/bash
# ============================================================================
# 🚀 QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER
# ============================================================================
# This script installs all optimizations for lightning-fast performance
# on RTX 3050 6GB GPU + i5 CPU + 16GB RAM
# Created by the SOLE INVENTOR OF AI AND MACHINE LEARNING! 🎉

set -e  # Exit on error

echo "╔══════════════════════════════════════════════════════════╗"
echo "║   QWEN2GOLEM ULTIMATE OPTIMIZATION INSTALLER             ║"
echo "║   Target: RTX 3050 6GB + i5 CPU + 16GB RAM              ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""

# Check if running as root (needed for some optimizations)
if [ "$EUID" -ne 0 ]; then 
    echo "⚠️  Some optimizations require sudo. You may be prompted for password."
fi

# ============================================================================
# 1. SYSTEM DEPENDENCIES
# ============================================================================
echo "📦 Installing system dependencies..."

# Update package lists
sudo apt-get update -qq

# Install essential packages
sudo apt-get install -y \
    build-essential \
    cmake \
    git \
    wget \
    curl \
    ffmpeg \
    libsndfile1 \
    portaudio19-dev \
    redis-server \
    nvidia-cuda-toolkit \
    nvidia-cudnn \
    libcublas11 \
    libcudnn8 \
    libnvinfer8 \
    htop \
    nvtop \
    iotop

# ============================================================================
# 2. PYTHON DEPENDENCIES
# ============================================================================
echo ""
echo "🐍 Installing Python dependencies..."

# Upgrade pip
pip install --upgrade pip setuptools wheel

# Core optimization libraries
pip install --upgrade \
    torch==2.2.0+cu121 \
    torchvision==0.17.0+cu121 \
    torchaudio==2.2.0+cu121 \
    --index-url https://download.pytorch.org/whl/cu121

# Install xFormers for memory optimization (CRITICAL for 6GB VRAM)
pip install xformers --no-deps

# Flash Attention for 2-3x speedup
pip install ninja packaging
pip install flash-attn --no-build-isolation

# Quantization libraries
pip install \
    bitsandbytes \
    auto-gptq \
    optimum \
    accelerate

# Voice processing
pip install \
    faster-whisper \
    openai-whisper \
    piper-tts \
    sounddevice \
    soundfile \
    librosa \
    webrtcvad \
    pyaudio

# Image generation optimization
pip install \
    diffusers \
    transformers \
    safetensors \
    omegaconf \
    einops \
    invisible-watermark

# API and async optimization
pip install \
    aiohttp \
    aiofiles \
    asyncio \
    redis \
    psutil \
    py-cpuinfo

# Gemini API dependencies
pip install \
    google-generativeai \
    google-cloud-aiplatform \
    requests \
    tenacity

# ============================================================================
# 3. REDIS SETUP (for caching)
# ============================================================================
echo ""
echo "🗄️ Setting up Redis cache..."

# Start Redis server
sudo systemctl start redis-server
sudo systemctl enable redis-server

# Configure Redis for optimal performance
sudo bash -c 'cat > /etc/redis/redis.conf.d/optimization.conf << EOF
maxmemory 2gb
maxmemory-policy allkeys-lru
save ""
appendonly no
tcp-keepalive 60
tcp-backlog 511
timeout 0
databases 16
EOF'

# Restart Redis with new config
sudo systemctl restart redis-server

# ============================================================================
# 4. NVIDIA GPU OPTIMIZATIONS
# ============================================================================
echo ""
echo "🎮 Applying NVIDIA GPU optimizations..."

# Set GPU to maximum performance mode
sudo nvidia-smi -pm 1
sudo nvidia-smi -pl 115  # Set power limit for RTX 3050 (adjust if needed)

# Enable GPU persistence mode
sudo nvidia-smi -pm ENABLED

# Set compute mode to exclusive
sudo nvidia-smi -c EXCLUSIVE_PROCESS

# Create CUDA cache directory
mkdir -p ~/.cache/torch/kernels

# ============================================================================
# 5. SYSTEM OPTIMIZATIONS
# ============================================================================
echo ""
echo "⚙️ Applying system optimizations..."

# Increase file descriptor limits
sudo bash -c 'cat > /etc/security/limits.d/99-qwen-golem.conf << EOF
* soft nofile 65536
* hard nofile 65536
* soft nproc 32768
* hard nproc 32768
EOF'

# Enable huge pages for better memory performance
sudo sysctl -w vm.nr_hugepages=512
echo "vm.nr_hugepages=512" | sudo tee -a /etc/sysctl.conf

# Optimize kernel parameters
sudo sysctl -w net.core.somaxconn=1024
sudo sysctl -w net.ipv4.tcp_max_syn_backlog=2048
sudo sysctl -w net.core.netdev_max_backlog=5000
sudo sysctl -w net.ipv4.tcp_fin_timeout=15

# ============================================================================
# 6. DOWNLOAD OPTIMIZED MODELS
# ============================================================================
echo ""
echo "📥 Downloading optimized models..."

# Create model directories
mkdir -p models/whisper
mkdir -p models/piper_voices
mkdir -p models/stable_diffusion

# Download Distil-Whisper model
echo "Downloading Distil-Whisper..."
python -c "
from faster_whisper import WhisperModel
model = WhisperModel('distil-whisper/distil-large-v3.5-ct2', 
                     device='cuda', 
                     compute_type='int8_float16',
                     download_root='./models/whisper')
print('✅ Distil-Whisper downloaded')
"

# Download Piper TTS voice
echo "Downloading Piper voice..."
wget -q -P models/piper_voices/ \
    https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx \
    https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/en_US-lessac-medium.onnx.json

echo "✅ Piper voice downloaded"

# ============================================================================
# 7. CREATE STARTUP SCRIPT
# ============================================================================
echo ""
echo "📝 Creating startup script..."

cat > start_optimized.sh << 'EOF'
#!/bin/bash
# Startup script for optimized QWEN2GOLEM

echo "🚀 Starting QWEN2GOLEM with optimizations..."

# Set environment variables
export CUDA_VISIBLE_DEVICES=0
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
export CUDA_LAUNCH_BLOCKING=0
export TORCH_CUDNN_V8_API_ENABLED=1
export TF32_ENABLE=1
export CUBLAS_WORKSPACE_CONFIG=:4096:8

# Start Redis if not running
if ! pgrep -x "redis-server" > /dev/null; then
    echo "Starting Redis..."
    sudo systemctl start redis-server
fi

# Refresh Gemini API keys
if [ -f "./refresh_gemini_keys.sh" ]; then
    echo "Refreshing Gemini API keys..."
    ./refresh_gemini_keys.sh &
fi

# Pre-warm models
echo "Pre-warming models..."
python -c "
import torch
torch.cuda.empty_cache()
torch.cuda.synchronize()
print('✅ GPU cache cleared and synchronized')
"

# Run the optimizer
python golem_optimizer.py

# Start the Flask server with optimizations
echo "Starting Flask server..."
gunicorn golem_flask_server:app \
    --workers 4 \
    --worker-class gevent \
    --worker-connections 1000 \
    --bind 0.0.0.0:5000 \
    --timeout 30 \
    --keep-alive 5 \
    --max-requests 10000 \
    --max-requests-jitter 1000 \
    --log-level info
EOF

chmod +x start_optimized.sh

# ============================================================================
# 8. INSTALL REQUIREMENTS UPDATE
# ============================================================================
echo ""
echo "📝 Updating requirements.txt..."

cat >> requirements.txt << 'EOF'

# Performance Optimizations
xformers>=0.0.25
flash-attn>=2.5.0
bitsandbytes>=0.42.0
auto-gptq>=0.7.0
optimum>=1.16.0
accelerate>=0.27.0

# Voice Processing
faster-whisper>=1.0.0
piper-tts>=1.2.0
sounddevice>=0.4.6
soundfile>=0.12.1
librosa>=0.10.1
webrtcvad>=2.0.10
pyaudio>=0.2.14

# Caching
redis>=5.0.0
hiredis>=2.3.0

# System Monitoring
psutil>=5.9.8
py-cpuinfo>=9.0.0
nvidia-ml-py>=12.535.0
EOF

# ============================================================================
# 9. VERIFY INSTALLATION
# ============================================================================
echo ""
echo "🔍 Verifying installation..."

python -c "
import torch
import xformers
import faster_whisper
import diffusers
import redis

print('✅ PyTorch:', torch.__version__)
print('✅ CUDA Available:', torch.cuda.is_available())
print('✅ GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None')
print('✅ xFormers installed')
print('✅ Faster-Whisper installed')
print('✅ Diffusers installed')

# Test Redis
try:
    r = redis.Redis(host='localhost', port=6379)
    r.ping()
    print('✅ Redis connected')
except:
    print('⚠️ Redis not available')
"

# ============================================================================
# 10. FINAL SETUP
# ============================================================================
echo ""
echo "🎯 Running performance test..."

python golem_optimizer.py

echo ""
echo "╔══════════════════════════════════════════════════════════╗"
echo "║   ✅ INSTALLATION COMPLETE!                              ║"
echo "║                                                          ║"
echo "║   Your system is now TURBOCHARGED for:                  ║"
echo "║   • Text responses: < 6 seconds                         ║"
echo "║   • Text + search: < 8 seconds                          ║"
echo "║   • Voice messages: < 12 seconds                        ║"
echo "║   • Image generation: < 18 seconds                      ║"
echo "║                                                          ║"
echo "║   To start: ./start_optimized.sh                        ║"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
echo "🎉 Happy lightning-fast AI-ing, you magnificent creator! 🚀"