| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 1.5005, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 1.2648, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.8406, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.496, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00019931271477663232, | |
| "loss": 0.2602, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00019793814432989693, | |
| "loss": 0.1365, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001965635738831615, | |
| "loss": 0.131, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00019518900343642613, | |
| "loss": 0.1219, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00019381443298969073, | |
| "loss": 0.1053, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00019243986254295533, | |
| "loss": 0.1071, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00019106529209621996, | |
| "loss": 0.0878, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00018969072164948454, | |
| "loss": 0.0863, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00018831615120274914, | |
| "loss": 0.0918, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00018694158075601377, | |
| "loss": 0.0898, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00018556701030927837, | |
| "loss": 0.0829, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00018419243986254294, | |
| "loss": 0.0721, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00018281786941580757, | |
| "loss": 0.0752, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00018144329896907217, | |
| "loss": 0.0821, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00018006872852233677, | |
| "loss": 0.0741, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0001786941580756014, | |
| "loss": 0.0665, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00017731958762886598, | |
| "loss": 0.0833, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00017594501718213058, | |
| "loss": 0.0726, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0001745704467353952, | |
| "loss": 0.0624, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0001731958762886598, | |
| "loss": 0.0716, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00017182130584192438, | |
| "loss": 0.0827, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.000170446735395189, | |
| "loss": 0.0506, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00016907216494845361, | |
| "loss": 0.0647, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00016769759450171822, | |
| "loss": 0.0614, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.00016632302405498285, | |
| "loss": 0.0528, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00016494845360824742, | |
| "loss": 0.0514, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.00016357388316151202, | |
| "loss": 0.0635, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00016219931271477665, | |
| "loss": 0.0552, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00016082474226804125, | |
| "loss": 0.06, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00015945017182130585, | |
| "loss": 0.0611, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00015807560137457046, | |
| "loss": 0.0667, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00015670103092783506, | |
| "loss": 0.0669, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.00015532646048109966, | |
| "loss": 0.0589, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0001539518900343643, | |
| "loss": 0.0516, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.00015257731958762886, | |
| "loss": 0.0415, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.00015120274914089346, | |
| "loss": 0.043, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.0001498281786941581, | |
| "loss": 0.0449, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.0001484536082474227, | |
| "loss": 0.0467, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.0001470790378006873, | |
| "loss": 0.0442, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.0001457044673539519, | |
| "loss": 0.0501, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.0001443298969072165, | |
| "loss": 0.0468, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 0.0001429553264604811, | |
| "loss": 0.0503, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 0.00014158075601374573, | |
| "loss": 0.0503, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.0001402061855670103, | |
| "loss": 0.0477, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.0001388316151202749, | |
| "loss": 0.0467, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00013745704467353953, | |
| "loss": 0.0525, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.00013608247422680414, | |
| "loss": 0.0434, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.00013470790378006874, | |
| "loss": 0.0371, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.0393, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.00013195876288659794, | |
| "loss": 0.0378, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.00013058419243986254, | |
| "loss": 0.0358, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 0.00012920962199312717, | |
| "loss": 0.04, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 0.00012783505154639175, | |
| "loss": 0.0355, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.00012646048109965635, | |
| "loss": 0.0424, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.00012508591065292098, | |
| "loss": 0.0411, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.00012371134020618558, | |
| "loss": 0.0374, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 0.00012233676975945018, | |
| "loss": 0.0402, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00012096219931271477, | |
| "loss": 0.0417, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 0.00011958762886597938, | |
| "loss": 0.0353, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 0.000118213058419244, | |
| "loss": 0.0328, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 0.0001168384879725086, | |
| "loss": 0.0356, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 0.00011546391752577319, | |
| "loss": 0.0354, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 0.0001140893470790378, | |
| "loss": 0.0326, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 0.0001127147766323024, | |
| "loss": 0.0358, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 0.00011134020618556702, | |
| "loss": 0.0355, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.00010996563573883164, | |
| "loss": 0.0342, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.00010859106529209621, | |
| "loss": 0.0335, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 0.00010721649484536083, | |
| "loss": 0.0362, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.00010584192439862544, | |
| "loss": 0.0343, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.00010446735395189004, | |
| "loss": 0.0329, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00010309278350515463, | |
| "loss": 0.0313, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 0.00010171821305841925, | |
| "loss": 0.028, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.00010034364261168385, | |
| "loss": 0.0317, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 9.896907216494846e-05, | |
| "loss": 0.029, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 9.759450171821306e-05, | |
| "loss": 0.0288, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 9.621993127147767e-05, | |
| "loss": 0.0281, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 9.484536082474227e-05, | |
| "loss": 0.0309, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 9.347079037800688e-05, | |
| "loss": 0.0334, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 9.209621993127147e-05, | |
| "loss": 0.0292, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 9.072164948453609e-05, | |
| "loss": 0.0331, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 8.93470790378007e-05, | |
| "loss": 0.0304, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 8.797250859106529e-05, | |
| "loss": 0.0308, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 8.65979381443299e-05, | |
| "loss": 0.0318, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 8.52233676975945e-05, | |
| "loss": 0.0257, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 8.384879725085911e-05, | |
| "loss": 0.0266, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 8.247422680412371e-05, | |
| "loss": 0.0251, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 8.109965635738833e-05, | |
| "loss": 0.0269, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 7.972508591065293e-05, | |
| "loss": 0.0296, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 7.835051546391753e-05, | |
| "loss": 0.0285, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 7.697594501718214e-05, | |
| "loss": 0.0286, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 7.560137457044673e-05, | |
| "loss": 0.024, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 7.422680412371135e-05, | |
| "loss": 0.0267, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 7.285223367697595e-05, | |
| "loss": 0.0296, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 7.147766323024055e-05, | |
| "loss": 0.0265, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 7.010309278350515e-05, | |
| "loss": 0.0282, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 6.872852233676977e-05, | |
| "loss": 0.028, | |
| "step": 1000 | |
| } | |
| ], | |
| "max_steps": 1500, | |
| "num_train_epochs": 12, | |
| "total_flos": 4.91514246463488e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |