arch: H_cycles: 2 H_layers: 4 L_cycles: 2 L_layers: 4 expansion: 4 halt_exploration_prob: 0.1 halt_max_steps: 16 hidden_size: 512 loss: loss_type: stablemax_cross_entropy name: losses@ACTLossHead name: hrm.hrm_act_v1@HierarchicalReasoningModel_ACTV1 num_heads: 8 pos_encodings: rope puzzle_emb_ndim: 512 beta1: 0.9 beta2: 0.95 checkpoint_every_eval: true checkpoint_path: checkpoints/Sudoku-extreme-1k-aug-1000/HierarchicalReasoningModel_ACTV1 manipulative-peacock data_path: data/sudoku-extreme-1k-aug-1000 epochs: 40000 eval_interval: 4000 eval_save_outputs: [] global_batch_size: 768 lr: 0.0001 lr_min_ratio: 1.0 lr_warmup_steps: 2000 project_name: Sudoku-extreme-1k-aug-1000 puzzle_emb_lr: 0.0001 puzzle_emb_weight_decay: 1.0 run_name: HierarchicalReasoningModel_ACTV1 manipulative-peacock seed: 0 weight_decay: 1.0