StriMap / fix_lfs_all.sh
cao
fix: add wget for dev mode
672c4ea
raw
history blame
3.72 kB
#!/bin/bash
# ============================================
# 自动检测并修复 Git 大文件 / LFS 问题
# 作者: Kai(StriMap 项目)
# 功能:
# 1. 自动安装 & 初始化 Git LFS
# 2. 扫描 >10MB 文件,自动 LFS 追踪
# 3. 重写历史并强制推送
# ============================================
set -e
echo "🚀 [1/7] 检查 Git LFS..."
if ! git lfs &>/dev/null; then
echo "❌ Git LFS 未安装,请先运行:conda install -c conda-forge git-lfs -y"
exit 1
fi
git lfs install
echo "✅ Git LFS 初始化完成。"
# --------------------------------------------
# 2. 检查当前仓库根目录
# --------------------------------------------
ROOT_DIR=$(git rev-parse --show-toplevel 2>/dev/null || echo "")
if [ -z "$ROOT_DIR" ]; then
echo "❌ 当前目录不是 Git 仓库,请进入正确的 repo 根目录。"
exit 1
fi
cd "$ROOT_DIR"
echo "📁 仓库根目录: $ROOT_DIR"
# --------------------------------------------
# 3. 自动追踪常见大文件类型
# --------------------------------------------
echo "🚀 [2/7] 添加常见文件类型到 Git LFS..."
git lfs track "*.pt" "*.pth" "*.bin" "*.npy" "*.npz" "*.pkl" "*.h5" "*.ckpt" "*.zarr" "*.fasta" "*.fa" "*.csv" "*.pdb"
git add .gitattributes
git commit -m "Auto-track common large file types with Git LFS" || true
# --------------------------------------------
# 4. 扫描当前工作区大文件 (>10MB)
# --------------------------------------------
echo "🔍 [3/7] 扫描当前工作区中超过 10MB 的文件..."
find . -type f -size +10M -printf "%P\n" | sort -u > large_files.txt
if [ ! -s large_files.txt ]; then
echo "✅ 当前工作区中没有超过 10MB 的文件。"
else
echo "⚠️ 以下文件超过 10MB,将自动加入 LFS 追踪:"
cat large_files.txt
while read f; do
git lfs track "$f"
done < large_files.txt
git add .gitattributes
git commit -m "Track detected large files with Git LFS" || true
fi
# --------------------------------------------
# 5. 重写 Git 历史,使旧版本也变成 LFS 指针
# --------------------------------------------
echo "🚀 [4/7] 重写历史(git lfs migrate import --everything)..."
git lfs migrate import --include="*.pt,*.pth,*.bin,*.npy,*.npz,*.pkl,*.h5,*.ckpt,*.zarr,*.fasta,*.fa,*.csv,*.pdb" --everything
# --------------------------------------------
# 6. 检查是否仍有大文件未迁移
# --------------------------------------------
echo "🔍 [5/7] 检查是否仍有历史大文件未迁移..."
git rev-list --objects --all > allfiles.txt
cat allfiles.txt | while read hash path; do
size=$(git cat-file -s "$hash" 2>/dev/null)
if [ "$size" -gt 10000000 ]; then
echo "$size bytes - $path"
fi
done > remaining_large.txt || true
if [ -s remaining_large.txt ]; then
echo "⚠️ 以下文件仍超过 10MB(可能未被 LFS 管理):"
cat remaining_large.txt
else
echo "✅ 所有大文件已迁移至 LFS。"
fi
# --------------------------------------------
# 7. 推送到远端
# --------------------------------------------
echo "🚀 [6/7] 准备推送到 Hugging Face..."
echo "⚠️ 这将使用 --force 推送(会覆盖远端历史)。"
read -p "是否继续?(yes/no): " CONFIRM
if [ "$CONFIRM" != "yes" ]; then
echo "❌ 用户取消推送。"
exit 0
fi
git push origin --force --all
git push origin --force --tags
echo "✅ [7/7] 推送完成。请前往 Hugging Face 页面确认 'Stored with Git LFS' 标记。"
# --------------------------------------------
# 完成
# --------------------------------------------
echo "🎉 修复流程完成!"
echo "如果下次新增大文件,请重新运行本脚本。"