UncheatableEval / mask_data.py
Jellyfish042's picture
Long Context Page
c807fbd
import json
import os
import sys
import argparse
from pathlib import Path
def process_json_file(input_file_path, output_dir):
try:
with open(input_file_path, "r", encoding="utf-8") as f:
data = json.load(f)
if "data_path" in data and data["data_path"]:
original_path = data["data_path"]
filename = os.path.basename(original_path)
data["data_path"] = filename
print(f"Processing file: {os.path.basename(input_file_path)}")
print(f" Original path: {original_path}")
print(f" Replaced with: {filename}")
os.makedirs(output_dir, exist_ok=True)
output_file_path = os.path.join(output_dir, os.path.basename(input_file_path))
with open(output_file_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print(f" Saved to: {output_file_path}\n")
return True
except json.JSONDecodeError as e:
print(f"Error: Failed to parse JSON file {input_file_path}: {e}\n")
return False
except Exception as e:
print(f"Error: Failed to process file {input_file_path}: {e}\n")
return False
def main():
parser = argparse.ArgumentParser(description="Process all JSON files in a directory, replacing data_path field paths with filenames")
parser.add_argument("input_dir", type=str, help="Input directory path containing JSON files")
parser.add_argument("output_dir", type=str, help="Output directory path for processed JSON files")
args = parser.parse_args()
input_dir = Path(args.input_dir)
output_dir = Path(args.output_dir)
if not input_dir.exists():
print(f"Error: Input directory does not exist: {input_dir}")
sys.exit(1)
if not input_dir.is_dir():
print(f"Error: Input path is not a directory: {input_dir}")
sys.exit(1)
json_files = list(input_dir.glob("*.json"))
if not json_files:
print(f"Warning: No JSON files found in directory {input_dir}")
sys.exit(0)
print(f"Found {len(json_files)} JSON files\n")
success_count = 0
for json_file in json_files:
if process_json_file(json_file, output_dir):
success_count += 1
print(f"\nProcessing complete: Successfully processed {success_count}/{len(json_files)} files")
if __name__ == "__main__":
main()