| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.4287245444801715, | |
| "eval_steps": 100, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007145409074669525, | |
| "grad_norm": 4.4086809158325195, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 1.0969, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01429081814933905, | |
| "grad_norm": 5.687011241912842, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 1.0795, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.021436227224008574, | |
| "grad_norm": 1.976590633392334, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 0.7536, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0285816362986781, | |
| "grad_norm": 3.1355409622192383, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.5564, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03572704537334762, | |
| "grad_norm": 2.6710309982299805, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 0.623, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04287245444801715, | |
| "grad_norm": 2.8567938804626465, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.5322, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.050017863522686674, | |
| "grad_norm": 3.4388861656188965, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.5102, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0571632725973562, | |
| "grad_norm": 3.093275308609009, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.568, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06430868167202572, | |
| "grad_norm": 2.3798677921295166, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.4883, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07145409074669525, | |
| "grad_norm": 2.846259117126465, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.417, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07145409074669525, | |
| "eval_news_finetune_val_loss": 0.48679304122924805, | |
| "eval_news_finetune_val_runtime": 1001.9158, | |
| "eval_news_finetune_val_samples_per_second": 1.397, | |
| "eval_news_finetune_val_steps_per_second": 1.397, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07859949982136477, | |
| "grad_norm": 1.9387887716293335, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 0.4595, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0857449088960343, | |
| "grad_norm": 2.3232853412628174, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.4658, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09289031797070382, | |
| "grad_norm": 2.813093423843384, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 0.4122, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10003572704537335, | |
| "grad_norm": 1.9588465690612793, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.4878, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10718113612004287, | |
| "grad_norm": 1.4838117361068726, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.4168, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1143265451947124, | |
| "grad_norm": 3.020738124847412, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.4298, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12147195426938193, | |
| "grad_norm": 2.097656011581421, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 0.4413, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12861736334405144, | |
| "grad_norm": 1.6332950592041016, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.3734, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13576277241872098, | |
| "grad_norm": 2.1570417881011963, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 0.4015, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1429081814933905, | |
| "grad_norm": 1.6941479444503784, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.4411, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1429081814933905, | |
| "eval_news_finetune_val_loss": 0.4338369369506836, | |
| "eval_news_finetune_val_runtime": 1002.1695, | |
| "eval_news_finetune_val_samples_per_second": 1.397, | |
| "eval_news_finetune_val_steps_per_second": 1.397, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15005359056806003, | |
| "grad_norm": 2.3582301139831543, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3697, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.15719899964272954, | |
| "grad_norm": 2.0517632961273193, | |
| "learning_rate": 5.2380952380952384e-05, | |
| "loss": 0.4076, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.16434440871739908, | |
| "grad_norm": 1.3338748216629028, | |
| "learning_rate": 5.4761904761904766e-05, | |
| "loss": 0.3307, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1714898177920686, | |
| "grad_norm": 3.0515363216400146, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.4227, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.17863522686673813, | |
| "grad_norm": 2.4899113178253174, | |
| "learning_rate": 5.9523809523809524e-05, | |
| "loss": 0.4689, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18578063594140765, | |
| "grad_norm": 1.6197255849838257, | |
| "learning_rate": 6.19047619047619e-05, | |
| "loss": 0.3618, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.19292604501607716, | |
| "grad_norm": 1.654628872871399, | |
| "learning_rate": 6.428571428571429e-05, | |
| "loss": 0.4668, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2000714540907467, | |
| "grad_norm": 1.6470831632614136, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.3525, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2072168631654162, | |
| "grad_norm": 2.640536308288574, | |
| "learning_rate": 6.904761904761905e-05, | |
| "loss": 0.3707, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.21436227224008575, | |
| "grad_norm": 2.3426971435546875, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.4461, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21436227224008575, | |
| "eval_news_finetune_val_loss": 0.40391305088996887, | |
| "eval_news_finetune_val_runtime": 1002.5797, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22150768131475526, | |
| "grad_norm": 1.0351321697235107, | |
| "learning_rate": 7.380952380952382e-05, | |
| "loss": 0.3439, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2286530903894248, | |
| "grad_norm": 3.062483549118042, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.4492, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2357984994640943, | |
| "grad_norm": 2.095825672149658, | |
| "learning_rate": 7.857142857142858e-05, | |
| "loss": 0.3399, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.24294390853876385, | |
| "grad_norm": 1.700642704963684, | |
| "learning_rate": 8.095238095238096e-05, | |
| "loss": 0.4336, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2500893176134334, | |
| "grad_norm": 1.6802127361297607, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.3628, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2572347266881029, | |
| "grad_norm": 1.1725817918777466, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 0.4113, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2643801357627724, | |
| "grad_norm": 1.0182325839996338, | |
| "learning_rate": 8.80952380952381e-05, | |
| "loss": 0.4009, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.27152554483744196, | |
| "grad_norm": 2.5762252807617188, | |
| "learning_rate": 9.047619047619048e-05, | |
| "loss": 0.3399, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.27867095391211144, | |
| "grad_norm": 1.5393809080123901, | |
| "learning_rate": 9.285714285714286e-05, | |
| "loss": 0.326, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.285816362986781, | |
| "grad_norm": 2.3259921073913574, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 0.4228, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.285816362986781, | |
| "eval_news_finetune_val_loss": 0.39322975277900696, | |
| "eval_news_finetune_val_runtime": 1002.8865, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2929617720614505, | |
| "grad_norm": 0.9278184771537781, | |
| "learning_rate": 9.761904761904762e-05, | |
| "loss": 0.3184, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.30010718113612006, | |
| "grad_norm": 1.4571782350540161, | |
| "learning_rate": 0.0001, | |
| "loss": 0.473, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.30725259021078954, | |
| "grad_norm": 1.6199829578399658, | |
| "learning_rate": 9.99982704095424e-05, | |
| "loss": 0.392, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3143979992854591, | |
| "grad_norm": 1.302309513092041, | |
| "learning_rate": 9.999308175782893e-05, | |
| "loss": 0.3824, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3215434083601286, | |
| "grad_norm": 1.438289761543274, | |
| "learning_rate": 9.998443440382927e-05, | |
| "loss": 0.4001, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.32868881743479816, | |
| "grad_norm": 1.7557189464569092, | |
| "learning_rate": 9.997232894579868e-05, | |
| "loss": 0.4144, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.33583422650946765, | |
| "grad_norm": 0.9362027645111084, | |
| "learning_rate": 9.995676622123655e-05, | |
| "loss": 0.3094, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3429796355841372, | |
| "grad_norm": 1.7850221395492554, | |
| "learning_rate": 9.993774730682845e-05, | |
| "loss": 0.2966, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.35012504465880673, | |
| "grad_norm": 1.705842137336731, | |
| "learning_rate": 9.991527351837174e-05, | |
| "loss": 0.3274, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.35727045373347627, | |
| "grad_norm": 1.0722746849060059, | |
| "learning_rate": 9.988934641068436e-05, | |
| "loss": 0.4301, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35727045373347627, | |
| "eval_news_finetune_val_loss": 0.3787713646888733, | |
| "eval_news_finetune_val_runtime": 1002.8588, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36441586280814575, | |
| "grad_norm": 1.282714605331421, | |
| "learning_rate": 9.985996777749747e-05, | |
| "loss": 0.3636, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3715612718828153, | |
| "grad_norm": 2.0360989570617676, | |
| "learning_rate": 9.982713965133122e-05, | |
| "loss": 0.4467, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.37870668095748483, | |
| "grad_norm": 1.7432626485824585, | |
| "learning_rate": 9.979086430335417e-05, | |
| "loss": 0.3875, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3858520900321543, | |
| "grad_norm": 1.6053438186645508, | |
| "learning_rate": 9.975114424322609e-05, | |
| "loss": 0.3646, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.39299749910682386, | |
| "grad_norm": 1.2323070764541626, | |
| "learning_rate": 9.970798221892452e-05, | |
| "loss": 0.353, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4001429081814934, | |
| "grad_norm": 1.16932213306427, | |
| "learning_rate": 9.966138121655445e-05, | |
| "loss": 0.331, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.40728831725616294, | |
| "grad_norm": 1.8134998083114624, | |
| "learning_rate": 9.961134446014184e-05, | |
| "loss": 0.3132, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.4144337263308324, | |
| "grad_norm": 1.4292124509811401, | |
| "learning_rate": 9.955787541141055e-05, | |
| "loss": 0.3017, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.42157913540550196, | |
| "grad_norm": 1.4605034589767456, | |
| "learning_rate": 9.950097776954284e-05, | |
| "loss": 0.3596, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4287245444801715, | |
| "grad_norm": 1.2365972995758057, | |
| "learning_rate": 9.944065547092345e-05, | |
| "loss": 0.3399, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4287245444801715, | |
| "eval_news_finetune_val_loss": 0.36549311876296997, | |
| "eval_news_finetune_val_runtime": 1002.8044, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43586995355484104, | |
| "grad_norm": 1.0590678453445435, | |
| "learning_rate": 9.937691268886725e-05, | |
| "loss": 0.3747, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4430153626295105, | |
| "grad_norm": 0.9111473560333252, | |
| "learning_rate": 9.930975383333056e-05, | |
| "loss": 0.2868, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.45016077170418006, | |
| "grad_norm": 2.0456018447875977, | |
| "learning_rate": 9.923918355060599e-05, | |
| "loss": 0.3289, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4573061807788496, | |
| "grad_norm": 1.5998501777648926, | |
| "learning_rate": 9.916520672300107e-05, | |
| "loss": 0.3664, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4644515898535191, | |
| "grad_norm": 1.0773181915283203, | |
| "learning_rate": 9.908782846850037e-05, | |
| "loss": 0.3432, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4715969989281886, | |
| "grad_norm": 1.244042158126831, | |
| "learning_rate": 9.900705414041154e-05, | |
| "loss": 0.3242, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.47874240800285817, | |
| "grad_norm": 1.8120310306549072, | |
| "learning_rate": 9.892288932699484e-05, | |
| "loss": 0.317, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4858878170775277, | |
| "grad_norm": 0.7863224148750305, | |
| "learning_rate": 9.883533985107663e-05, | |
| "loss": 0.322, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.4930332261521972, | |
| "grad_norm": 1.223832130432129, | |
| "learning_rate": 9.874441176964642e-05, | |
| "loss": 0.343, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5001786352268668, | |
| "grad_norm": 0.9870743155479431, | |
| "learning_rate": 9.865011137343787e-05, | |
| "loss": 0.3278, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5001786352268668, | |
| "eval_news_finetune_val_loss": 0.35386842489242554, | |
| "eval_news_finetune_val_runtime": 1003.4109, | |
| "eval_news_finetune_val_samples_per_second": 1.395, | |
| "eval_news_finetune_val_steps_per_second": 1.395, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5073240443015362, | |
| "grad_norm": 1.3699963092803955, | |
| "learning_rate": 9.85524451864936e-05, | |
| "loss": 0.3902, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5144694533762058, | |
| "grad_norm": 1.7188071012496948, | |
| "learning_rate": 9.845141996571384e-05, | |
| "loss": 0.369, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5216148624508753, | |
| "grad_norm": 0.4889034628868103, | |
| "learning_rate": 9.834704270038888e-05, | |
| "loss": 0.3174, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5287602715255448, | |
| "grad_norm": 0.8782143592834473, | |
| "learning_rate": 9.823932061171561e-05, | |
| "loss": 0.3501, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5359056806002144, | |
| "grad_norm": 2.4089126586914062, | |
| "learning_rate": 9.812826115229789e-05, | |
| "loss": 0.3292, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5430510896748839, | |
| "grad_norm": 1.6382787227630615, | |
| "learning_rate": 9.801387200563096e-05, | |
| "loss": 0.459, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5501964987495535, | |
| "grad_norm": 1.443916916847229, | |
| "learning_rate": 9.789616108556992e-05, | |
| "loss": 0.3409, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5573419078242229, | |
| "grad_norm": 1.632278323173523, | |
| "learning_rate": 9.77751365357821e-05, | |
| "loss": 0.281, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5644873168988924, | |
| "grad_norm": 2.1452109813690186, | |
| "learning_rate": 9.765080672918374e-05, | |
| "loss": 0.3511, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.571632725973562, | |
| "grad_norm": 1.2721842527389526, | |
| "learning_rate": 9.752318026736078e-05, | |
| "loss": 0.2298, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.571632725973562, | |
| "eval_news_finetune_val_loss": 0.34554028511047363, | |
| "eval_news_finetune_val_runtime": 1003.3342, | |
| "eval_news_finetune_val_samples_per_second": 1.395, | |
| "eval_news_finetune_val_steps_per_second": 1.395, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5787781350482315, | |
| "grad_norm": 2.5264174938201904, | |
| "learning_rate": 9.739226597997359e-05, | |
| "loss": 0.3214, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.585923544122901, | |
| "grad_norm": 1.4553183317184448, | |
| "learning_rate": 9.725807292414629e-05, | |
| "loss": 0.2697, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5930689531975706, | |
| "grad_norm": 2.2111873626708984, | |
| "learning_rate": 9.712061038384002e-05, | |
| "loss": 0.3315, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6002143622722401, | |
| "grad_norm": 1.4308302402496338, | |
| "learning_rate": 9.697988786921071e-05, | |
| "loss": 0.4036, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6073597713469097, | |
| "grad_norm": 1.8136054277420044, | |
| "learning_rate": 9.683591511595107e-05, | |
| "loss": 0.2946, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6145051804215791, | |
| "grad_norm": 1.8586084842681885, | |
| "learning_rate": 9.668870208461713e-05, | |
| "loss": 0.2259, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6216505894962486, | |
| "grad_norm": 1.1640444993972778, | |
| "learning_rate": 9.653825895993908e-05, | |
| "loss": 0.4, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6287959985709182, | |
| "grad_norm": 1.386013388633728, | |
| "learning_rate": 9.63845961501166e-05, | |
| "loss": 0.2804, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6359414076455877, | |
| "grad_norm": 2.1413650512695312, | |
| "learning_rate": 9.622772428609887e-05, | |
| "loss": 0.3593, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6430868167202572, | |
| "grad_norm": 1.5462217330932617, | |
| "learning_rate": 9.606765422084908e-05, | |
| "loss": 0.3058, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6430868167202572, | |
| "eval_news_finetune_val_loss": 0.3292103707790375, | |
| "eval_news_finetune_val_runtime": 1003.4558, | |
| "eval_news_finetune_val_samples_per_second": 1.395, | |
| "eval_news_finetune_val_steps_per_second": 1.395, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6502322257949268, | |
| "grad_norm": 1.0373942852020264, | |
| "learning_rate": 9.590439702859351e-05, | |
| "loss": 0.3318, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6573776348695963, | |
| "grad_norm": 1.2724213600158691, | |
| "learning_rate": 9.573796400405544e-05, | |
| "loss": 0.3328, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6645230439442658, | |
| "grad_norm": 0.8528966903686523, | |
| "learning_rate": 9.55683666616737e-05, | |
| "loss": 0.2673, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6716684530189353, | |
| "grad_norm": 1.65499746799469, | |
| "learning_rate": 9.539561673480612e-05, | |
| "loss": 0.3538, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6788138620936048, | |
| "grad_norm": 2.341379404067993, | |
| "learning_rate": 9.521972617491767e-05, | |
| "loss": 0.3228, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6859592711682744, | |
| "grad_norm": 1.4938244819641113, | |
| "learning_rate": 9.504070715075372e-05, | |
| "loss": 0.3974, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6931046802429439, | |
| "grad_norm": 1.0390361547470093, | |
| "learning_rate": 9.485857204749811e-05, | |
| "loss": 0.3236, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7002500893176135, | |
| "grad_norm": 3.8845393657684326, | |
| "learning_rate": 9.467333346591632e-05, | |
| "loss": 0.3027, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.707395498392283, | |
| "grad_norm": 1.3295674324035645, | |
| "learning_rate": 9.448500422148364e-05, | |
| "loss": 0.3005, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7145409074669525, | |
| "grad_norm": 1.0146369934082031, | |
| "learning_rate": 9.429359734349863e-05, | |
| "loss": 0.294, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7145409074669525, | |
| "eval_news_finetune_val_loss": 0.3208242654800415, | |
| "eval_news_finetune_val_runtime": 1003.2491, | |
| "eval_news_finetune_val_samples_per_second": 1.395, | |
| "eval_news_finetune_val_steps_per_second": 1.395, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.721686316541622, | |
| "grad_norm": 1.5076738595962524, | |
| "learning_rate": 9.409912607418172e-05, | |
| "loss": 0.268, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7288317256162915, | |
| "grad_norm": 3.3230276107788086, | |
| "learning_rate": 9.390160386775895e-05, | |
| "loss": 0.3038, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.735977134690961, | |
| "grad_norm": 1.699854850769043, | |
| "learning_rate": 9.370104438953125e-05, | |
| "loss": 0.2869, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7431225437656306, | |
| "grad_norm": 0.904507577419281, | |
| "learning_rate": 9.349746151492902e-05, | |
| "loss": 0.289, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7502679528403001, | |
| "grad_norm": 0.9463105201721191, | |
| "learning_rate": 9.329086932855215e-05, | |
| "loss": 0.3729, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7574133619149697, | |
| "grad_norm": 1.4746607542037964, | |
| "learning_rate": 9.30812821231956e-05, | |
| "loss": 0.2282, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7645587709896392, | |
| "grad_norm": 1.0270076990127563, | |
| "learning_rate": 9.286871439886058e-05, | |
| "loss": 0.3029, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7717041800643086, | |
| "grad_norm": 2.0656538009643555, | |
| "learning_rate": 9.265318086175143e-05, | |
| "loss": 0.3268, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7788495891389782, | |
| "grad_norm": 0.9798826575279236, | |
| "learning_rate": 9.243469642325805e-05, | |
| "loss": 0.2942, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7859949982136477, | |
| "grad_norm": 1.1419672966003418, | |
| "learning_rate": 9.221327619892452e-05, | |
| "loss": 0.3266, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7859949982136477, | |
| "eval_news_finetune_val_loss": 0.307956337928772, | |
| "eval_news_finetune_val_runtime": 1003.1873, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7931404072883173, | |
| "grad_norm": 0.6810228228569031, | |
| "learning_rate": 9.198893550740306e-05, | |
| "loss": 0.3596, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8002858163629868, | |
| "grad_norm": 1.6553049087524414, | |
| "learning_rate": 9.176168986939446e-05, | |
| "loss": 0.3106, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8074312254376563, | |
| "grad_norm": 0.7749443650245667, | |
| "learning_rate": 9.153155500657422e-05, | |
| "loss": 0.3298, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8145766345123259, | |
| "grad_norm": 0.8693751096725464, | |
| "learning_rate": 9.129854684050481e-05, | |
| "loss": 0.279, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8217220435869954, | |
| "grad_norm": 1.1013332605361938, | |
| "learning_rate": 9.10626814915343e-05, | |
| "loss": 0.3195, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8288674526616648, | |
| "grad_norm": 1.2278695106506348, | |
| "learning_rate": 9.082397527768092e-05, | |
| "loss": 0.3027, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8360128617363344, | |
| "grad_norm": 2.173530101776123, | |
| "learning_rate": 9.058244471350428e-05, | |
| "loss": 0.2238, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8431582708110039, | |
| "grad_norm": 1.125986933708191, | |
| "learning_rate": 9.033810650896274e-05, | |
| "loss": 0.2399, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8503036798856735, | |
| "grad_norm": 0.6611151099205017, | |
| "learning_rate": 9.009097756825737e-05, | |
| "loss": 0.2736, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.857449088960343, | |
| "grad_norm": 1.9068485498428345, | |
| "learning_rate": 8.98410749886625e-05, | |
| "loss": 0.2949, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.857449088960343, | |
| "eval_news_finetune_val_loss": 0.31006094813346863, | |
| "eval_news_finetune_val_runtime": 1002.7866, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8645944980350125, | |
| "grad_norm": 1.192031979560852, | |
| "learning_rate": 8.958841605934278e-05, | |
| "loss": 0.3657, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8717399071096821, | |
| "grad_norm": 1.2596725225448608, | |
| "learning_rate": 8.933301826015715e-05, | |
| "loss": 0.3068, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8788853161843515, | |
| "grad_norm": 1.4713683128356934, | |
| "learning_rate": 8.907489926044945e-05, | |
| "loss": 0.3122, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.886030725259021, | |
| "grad_norm": 1.3583886623382568, | |
| "learning_rate": 8.881407691782608e-05, | |
| "loss": 0.2989, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8931761343336906, | |
| "grad_norm": 0.9863426089286804, | |
| "learning_rate": 8.855056927692037e-05, | |
| "loss": 0.2549, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9003215434083601, | |
| "grad_norm": 1.0579396486282349, | |
| "learning_rate": 8.828439456814442e-05, | |
| "loss": 0.2809, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9074669524830297, | |
| "grad_norm": 2.847482681274414, | |
| "learning_rate": 8.801557120642766e-05, | |
| "loss": 0.2933, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9146123615576992, | |
| "grad_norm": 0.8942415118217468, | |
| "learning_rate": 8.774411778994295e-05, | |
| "loss": 0.2866, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.9217577706323687, | |
| "grad_norm": 1.297845721244812, | |
| "learning_rate": 8.747005309881984e-05, | |
| "loss": 0.2939, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.9289031797070382, | |
| "grad_norm": 1.2745181322097778, | |
| "learning_rate": 8.719339609384531e-05, | |
| "loss": 0.3018, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9289031797070382, | |
| "eval_news_finetune_val_loss": 0.29822030663490295, | |
| "eval_news_finetune_val_runtime": 1002.5672, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9360485887817077, | |
| "grad_norm": 1.3898978233337402, | |
| "learning_rate": 8.691416591515198e-05, | |
| "loss": 0.295, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9431939978563773, | |
| "grad_norm": 1.1516591310501099, | |
| "learning_rate": 8.663238188089398e-05, | |
| "loss": 0.209, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9503394069310468, | |
| "grad_norm": 0.9356768131256104, | |
| "learning_rate": 8.634806348591036e-05, | |
| "loss": 0.2904, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9574848160057163, | |
| "grad_norm": 1.884950876235962, | |
| "learning_rate": 8.606123040037643e-05, | |
| "loss": 0.2607, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9646302250803859, | |
| "grad_norm": 1.2719082832336426, | |
| "learning_rate": 8.577190246844291e-05, | |
| "loss": 0.3279, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9717756341550554, | |
| "grad_norm": 0.935297429561615, | |
| "learning_rate": 8.548009970686302e-05, | |
| "loss": 0.3011, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.978921043229725, | |
| "grad_norm": 1.6732884645462036, | |
| "learning_rate": 8.51858423036076e-05, | |
| "loss": 0.2379, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9860664523043944, | |
| "grad_norm": 0.6651692390441895, | |
| "learning_rate": 8.488915061646856e-05, | |
| "loss": 0.2599, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9932118613790639, | |
| "grad_norm": 1.121752381324768, | |
| "learning_rate": 8.459004517165032e-05, | |
| "loss": 0.2265, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5099928379058838, | |
| "learning_rate": 8.428854666234978e-05, | |
| "loss": 0.3301, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_news_finetune_val_loss": 0.28762951493263245, | |
| "eval_news_finetune_val_runtime": 1002.7793, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0071454090746694, | |
| "grad_norm": 0.9986103177070618, | |
| "learning_rate": 8.398467594732478e-05, | |
| "loss": 0.2021, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.014290818149339, | |
| "grad_norm": 1.2675282955169678, | |
| "learning_rate": 8.367845404945084e-05, | |
| "loss": 0.2228, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.0214362272240085, | |
| "grad_norm": 0.8156709671020508, | |
| "learning_rate": 8.336990215426688e-05, | |
| "loss": 0.1947, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.0285816362986782, | |
| "grad_norm": 0.5374387502670288, | |
| "learning_rate": 8.305904160850941e-05, | |
| "loss": 0.2344, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.0357270453733476, | |
| "grad_norm": 0.6672261357307434, | |
| "learning_rate": 8.274589391863583e-05, | |
| "loss": 0.1919, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0428724544480172, | |
| "grad_norm": 0.9803467988967896, | |
| "learning_rate": 8.243048074933634e-05, | |
| "loss": 0.2218, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.0500178635226867, | |
| "grad_norm": 1.482840657234192, | |
| "learning_rate": 8.21128239220353e-05, | |
| "loss": 0.2556, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.057163272597356, | |
| "grad_norm": 1.0589625835418701, | |
| "learning_rate": 8.179294541338135e-05, | |
| "loss": 0.2052, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.0643086816720257, | |
| "grad_norm": 0.8332052230834961, | |
| "learning_rate": 8.147086735372716e-05, | |
| "loss": 0.2386, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.0714540907466952, | |
| "grad_norm": 0.6018723845481873, | |
| "learning_rate": 8.114661202559828e-05, | |
| "loss": 0.1426, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0714540907466952, | |
| "eval_news_finetune_val_loss": 0.30121028423309326, | |
| "eval_news_finetune_val_runtime": 1002.7457, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0785994998213648, | |
| "grad_norm": 1.7663507461547852, | |
| "learning_rate": 8.082020186215156e-05, | |
| "loss": 0.2407, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0857449088960343, | |
| "grad_norm": 1.2081632614135742, | |
| "learning_rate": 8.049165944562316e-05, | |
| "loss": 0.2483, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.092890317970704, | |
| "grad_norm": 0.5045826435089111, | |
| "learning_rate": 8.016100750576621e-05, | |
| "loss": 0.2013, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.1000357270453733, | |
| "grad_norm": 1.4456278085708618, | |
| "learning_rate": 7.98282689182783e-05, | |
| "loss": 0.2034, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.107181136120043, | |
| "grad_norm": 1.1558668613433838, | |
| "learning_rate": 7.949346670321891e-05, | |
| "loss": 0.2386, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1143265451947124, | |
| "grad_norm": 1.4196126461029053, | |
| "learning_rate": 7.915662402341664e-05, | |
| "loss": 0.2299, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.1214719542693818, | |
| "grad_norm": 0.9341222047805786, | |
| "learning_rate": 7.88177641828669e-05, | |
| "loss": 0.2105, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.1286173633440515, | |
| "grad_norm": 1.066001296043396, | |
| "learning_rate": 7.847691062511957e-05, | |
| "loss": 0.1925, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.135762772418721, | |
| "grad_norm": 0.7840182781219482, | |
| "learning_rate": 7.813408693165704e-05, | |
| "loss": 0.2425, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.1429081814933906, | |
| "grad_norm": 0.983668327331543, | |
| "learning_rate": 7.778931682026293e-05, | |
| "loss": 0.2014, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1429081814933906, | |
| "eval_news_finetune_val_loss": 0.29564452171325684, | |
| "eval_news_finetune_val_runtime": 1003.001, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.15005359056806, | |
| "grad_norm": 1.63984215259552, | |
| "learning_rate": 7.744262414338099e-05, | |
| "loss": 0.2863, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.1571989996427297, | |
| "grad_norm": 0.9211621284484863, | |
| "learning_rate": 7.709403288646507e-05, | |
| "loss": 0.2175, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.164344408717399, | |
| "grad_norm": 1.3369996547698975, | |
| "learning_rate": 7.67435671663196e-05, | |
| "loss": 0.1893, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.1714898177920685, | |
| "grad_norm": 0.7532891631126404, | |
| "learning_rate": 7.63912512294312e-05, | |
| "loss": 0.2483, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.1786352268667382, | |
| "grad_norm": 1.0959442853927612, | |
| "learning_rate": 7.603710945029119e-05, | |
| "loss": 0.1888, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1857806359414076, | |
| "grad_norm": 0.9019472599029541, | |
| "learning_rate": 7.568116632970922e-05, | |
| "loss": 0.2144, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.1929260450160772, | |
| "grad_norm": 1.1219818592071533, | |
| "learning_rate": 7.532344649311829e-05, | |
| "loss": 0.191, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.2000714540907467, | |
| "grad_norm": 1.0829100608825684, | |
| "learning_rate": 7.496397468887106e-05, | |
| "loss": 0.2762, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.2072168631654163, | |
| "grad_norm": 0.7855832576751709, | |
| "learning_rate": 7.460277578652759e-05, | |
| "loss": 0.157, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.2143622722400857, | |
| "grad_norm": 2.407999038696289, | |
| "learning_rate": 7.423987477513488e-05, | |
| "loss": 0.2627, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2143622722400857, | |
| "eval_news_finetune_val_loss": 0.28248873353004456, | |
| "eval_news_finetune_val_runtime": 1003.1081, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2215076813147552, | |
| "grad_norm": 1.5500895977020264, | |
| "learning_rate": 7.387529676149799e-05, | |
| "loss": 0.1477, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.2286530903894248, | |
| "grad_norm": 1.5599130392074585, | |
| "learning_rate": 7.350906696844307e-05, | |
| "loss": 0.1942, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.2357984994640943, | |
| "grad_norm": 1.6327091455459595, | |
| "learning_rate": 7.314121073307229e-05, | |
| "loss": 0.2, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.242943908538764, | |
| "grad_norm": 0.6044666767120361, | |
| "learning_rate": 7.277175350501111e-05, | |
| "loss": 0.185, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.2500893176134333, | |
| "grad_norm": 1.317089319229126, | |
| "learning_rate": 7.240072084464729e-05, | |
| "loss": 0.196, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.257234726688103, | |
| "grad_norm": 1.089105486869812, | |
| "learning_rate": 7.202813842136283e-05, | |
| "loss": 0.1322, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.2643801357627724, | |
| "grad_norm": 1.4972888231277466, | |
| "learning_rate": 7.165403201175787e-05, | |
| "loss": 0.2176, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.2715255448374418, | |
| "grad_norm": 1.4998830556869507, | |
| "learning_rate": 7.127842749786747e-05, | |
| "loss": 0.218, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.2786709539121115, | |
| "grad_norm": 0.9759517908096313, | |
| "learning_rate": 7.090135086537095e-05, | |
| "loss": 0.1653, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.285816362986781, | |
| "grad_norm": 0.9713583588600159, | |
| "learning_rate": 7.052282820179412e-05, | |
| "loss": 0.175, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.285816362986781, | |
| "eval_news_finetune_val_loss": 0.2936909794807434, | |
| "eval_news_finetune_val_runtime": 1003.12, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2929617720614506, | |
| "grad_norm": 0.6328814625740051, | |
| "learning_rate": 7.014288569470446e-05, | |
| "loss": 0.1727, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.30010718113612, | |
| "grad_norm": 1.622104525566101, | |
| "learning_rate": 6.976154962989934e-05, | |
| "loss": 0.2363, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.3072525902107897, | |
| "grad_norm": 1.8254674673080444, | |
| "learning_rate": 6.937884638958757e-05, | |
| "loss": 0.1897, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.314397999285459, | |
| "grad_norm": 0.8813793063163757, | |
| "learning_rate": 6.899480245056396e-05, | |
| "loss": 0.2029, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.3215434083601285, | |
| "grad_norm": 0.7675999999046326, | |
| "learning_rate": 6.860944438237788e-05, | |
| "loss": 0.2025, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.3286888174347982, | |
| "grad_norm": 1.1973013877868652, | |
| "learning_rate": 6.82227988454948e-05, | |
| "loss": 0.2317, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.3358342265094676, | |
| "grad_norm": 0.7864009737968445, | |
| "learning_rate": 6.783489258945195e-05, | |
| "loss": 0.2318, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.3429796355841372, | |
| "grad_norm": 1.0866330862045288, | |
| "learning_rate": 6.74457524510077e-05, | |
| "loss": 0.1871, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.3501250446588067, | |
| "grad_norm": 0.8745126724243164, | |
| "learning_rate": 6.705540535228485e-05, | |
| "loss": 0.211, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.3572704537334763, | |
| "grad_norm": 1.3401581048965454, | |
| "learning_rate": 6.66638782989081e-05, | |
| "loss": 0.2307, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3572704537334763, | |
| "eval_news_finetune_val_loss": 0.2787444591522217, | |
| "eval_news_finetune_val_runtime": 1002.9344, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3644158628081458, | |
| "grad_norm": 0.6149284839630127, | |
| "learning_rate": 6.627119837813564e-05, | |
| "loss": 0.2128, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.3715612718828152, | |
| "grad_norm": 1.7847625017166138, | |
| "learning_rate": 6.587739275698525e-05, | |
| "loss": 0.1551, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.3787066809574848, | |
| "grad_norm": 1.1973716020584106, | |
| "learning_rate": 6.54824886803547e-05, | |
| "loss": 0.2335, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.3858520900321543, | |
| "grad_norm": 1.5757859945297241, | |
| "learning_rate": 6.508651346913687e-05, | |
| "loss": 0.1504, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.392997499106824, | |
| "grad_norm": 1.7269341945648193, | |
| "learning_rate": 6.468949451832968e-05, | |
| "loss": 0.2679, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.4001429081814933, | |
| "grad_norm": 1.6860129833221436, | |
| "learning_rate": 6.429145929514063e-05, | |
| "loss": 0.1942, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.407288317256163, | |
| "grad_norm": 1.1732631921768188, | |
| "learning_rate": 6.389243533708671e-05, | |
| "loss": 0.2025, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.4144337263308324, | |
| "grad_norm": 0.9073033332824707, | |
| "learning_rate": 6.349245025008912e-05, | |
| "loss": 0.1836, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.4215791354055018, | |
| "grad_norm": 1.133843183517456, | |
| "learning_rate": 6.309153170656342e-05, | |
| "loss": 0.1526, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.4287245444801715, | |
| "grad_norm": 2.656296968460083, | |
| "learning_rate": 6.268970744350515e-05, | |
| "loss": 0.1939, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.4287245444801715, | |
| "eval_news_finetune_val_loss": 0.27414408326148987, | |
| "eval_news_finetune_val_runtime": 1003.0949, | |
| "eval_news_finetune_val_samples_per_second": 1.396, | |
| "eval_news_finetune_val_steps_per_second": 1.396, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4197, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.538125336973312e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |