| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6020469596628537, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012040939193257074, | |
| "grad_norm": 2.0694425106048584, | |
| "learning_rate": 1.2033694344163658e-08, | |
| "loss": 0.6897, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002408187838651415, | |
| "grad_norm": 2.151496171951294, | |
| "learning_rate": 2.4067388688327316e-08, | |
| "loss": 0.6787, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003612281757977122, | |
| "grad_norm": 2.640268564224243, | |
| "learning_rate": 3.610108303249097e-08, | |
| "loss": 0.6639, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00481637567730283, | |
| "grad_norm": 2.6572210788726807, | |
| "learning_rate": 4.813477737665463e-08, | |
| "loss": 0.7152, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006020469596628537, | |
| "grad_norm": 1.7933714389801025, | |
| "learning_rate": 6.016847172081829e-08, | |
| "loss": 0.6503, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007224563515954244, | |
| "grad_norm": 2.3688879013061523, | |
| "learning_rate": 7.220216606498194e-08, | |
| "loss": 0.6827, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008428657435279952, | |
| "grad_norm": 2.220139265060425, | |
| "learning_rate": 8.42358604091456e-08, | |
| "loss": 0.6443, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.00963275135460566, | |
| "grad_norm": 2.4725093841552734, | |
| "learning_rate": 9.626955475330927e-08, | |
| "loss": 0.6681, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.010836845273931367, | |
| "grad_norm": 1.4149224758148193, | |
| "learning_rate": 1.0830324909747292e-07, | |
| "loss": 0.5592, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.012040939193257074, | |
| "grad_norm": 0.9355699419975281, | |
| "learning_rate": 1.2033694344163658e-07, | |
| "loss": 0.5802, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.013245033112582781, | |
| "grad_norm": 1.0211461782455444, | |
| "learning_rate": 1.3237063778580024e-07, | |
| "loss": 0.5589, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.014449127031908489, | |
| "grad_norm": 1.0006492137908936, | |
| "learning_rate": 1.4440433212996388e-07, | |
| "loss": 0.5421, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.015653220951234198, | |
| "grad_norm": 0.8444674015045166, | |
| "learning_rate": 1.5643802647412754e-07, | |
| "loss": 0.5079, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.016857314870559904, | |
| "grad_norm": 0.7920398712158203, | |
| "learning_rate": 1.684717208182912e-07, | |
| "loss": 0.4898, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.018061408789885613, | |
| "grad_norm": 0.6817948818206787, | |
| "learning_rate": 1.8050541516245487e-07, | |
| "loss": 0.4645, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01926550270921132, | |
| "grad_norm": 0.9353106021881104, | |
| "learning_rate": 1.9253910950661853e-07, | |
| "loss": 0.485, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.020469596628537028, | |
| "grad_norm": 0.6695616841316223, | |
| "learning_rate": 2.045728038507822e-07, | |
| "loss": 0.4647, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.021673690547862733, | |
| "grad_norm": 0.6993837952613831, | |
| "learning_rate": 2.1660649819494583e-07, | |
| "loss": 0.4378, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.022877784467188442, | |
| "grad_norm": 0.7333642244338989, | |
| "learning_rate": 2.286401925391095e-07, | |
| "loss": 0.4288, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.024081878386514148, | |
| "grad_norm": 0.707914412021637, | |
| "learning_rate": 2.4067388688327316e-07, | |
| "loss": 0.4601, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.025285972305839857, | |
| "grad_norm": 0.7626605033874512, | |
| "learning_rate": 2.527075812274368e-07, | |
| "loss": 0.4454, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.026490066225165563, | |
| "grad_norm": 1.2267224788665771, | |
| "learning_rate": 2.647412755716005e-07, | |
| "loss": 0.4398, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.027694160144491272, | |
| "grad_norm": 0.7376552224159241, | |
| "learning_rate": 2.767749699157641e-07, | |
| "loss": 0.4275, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.028898254063816978, | |
| "grad_norm": 0.7109339237213135, | |
| "learning_rate": 2.8880866425992776e-07, | |
| "loss": 0.3996, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.030102347983142687, | |
| "grad_norm": 0.6406791806221008, | |
| "learning_rate": 3.008423586040915e-07, | |
| "loss": 0.4337, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.031306441902468396, | |
| "grad_norm": 0.6780328154563904, | |
| "learning_rate": 3.128760529482551e-07, | |
| "loss": 0.4296, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0325105358217941, | |
| "grad_norm": 0.5574681162834167, | |
| "learning_rate": 3.2490974729241875e-07, | |
| "loss": 0.4123, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03371462974111981, | |
| "grad_norm": 0.6190093755722046, | |
| "learning_rate": 3.369434416365824e-07, | |
| "loss": 0.3959, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.034918723660445516, | |
| "grad_norm": 0.6488677859306335, | |
| "learning_rate": 3.4897713598074607e-07, | |
| "loss": 0.3883, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.036122817579771226, | |
| "grad_norm": 0.6014848351478577, | |
| "learning_rate": 3.6101083032490974e-07, | |
| "loss": 0.4222, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03732691149909693, | |
| "grad_norm": 0.5347362160682678, | |
| "learning_rate": 3.730445246690734e-07, | |
| "loss": 0.3929, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03853100541842264, | |
| "grad_norm": 1.4445090293884277, | |
| "learning_rate": 3.8507821901323706e-07, | |
| "loss": 0.3798, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.039735099337748346, | |
| "grad_norm": 0.6319730877876282, | |
| "learning_rate": 3.9711191335740067e-07, | |
| "loss": 0.386, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.040939193257074055, | |
| "grad_norm": 0.9257851243019104, | |
| "learning_rate": 4.091456077015644e-07, | |
| "loss": 0.393, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.04214328717639976, | |
| "grad_norm": 0.5936801433563232, | |
| "learning_rate": 4.2117930204572805e-07, | |
| "loss": 0.3912, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04334738109572547, | |
| "grad_norm": 0.686888575553894, | |
| "learning_rate": 4.3321299638989166e-07, | |
| "loss": 0.4015, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.044551475015051176, | |
| "grad_norm": 0.5986278653144836, | |
| "learning_rate": 4.452466907340554e-07, | |
| "loss": 0.3622, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.045755568934376885, | |
| "grad_norm": 0.5603286623954773, | |
| "learning_rate": 4.57280385078219e-07, | |
| "loss": 0.3774, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04695966285370259, | |
| "grad_norm": 1.2507776021957397, | |
| "learning_rate": 4.6931407942238265e-07, | |
| "loss": 0.3681, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.048163756773028296, | |
| "grad_norm": 0.5886845588684082, | |
| "learning_rate": 4.813477737665463e-07, | |
| "loss": 0.371, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.049367850692354005, | |
| "grad_norm": 0.5690301656723022, | |
| "learning_rate": 4.9338146811071e-07, | |
| "loss": 0.3454, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.050571944611679714, | |
| "grad_norm": 0.6363804340362549, | |
| "learning_rate": 5.054151624548736e-07, | |
| "loss": 0.3477, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.05177603853100542, | |
| "grad_norm": 0.49289166927337646, | |
| "learning_rate": 5.174488567990373e-07, | |
| "loss": 0.352, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.052980132450331126, | |
| "grad_norm": 0.5901724696159363, | |
| "learning_rate": 5.29482551143201e-07, | |
| "loss": 0.3514, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.054184226369656835, | |
| "grad_norm": 0.6019484996795654, | |
| "learning_rate": 5.415162454873646e-07, | |
| "loss": 0.3713, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.055388320288982544, | |
| "grad_norm": 0.5057175755500793, | |
| "learning_rate": 5.535499398315282e-07, | |
| "loss": 0.3346, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.056592414208308246, | |
| "grad_norm": 0.4834252893924713, | |
| "learning_rate": 5.655836341756919e-07, | |
| "loss": 0.3638, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.057796508127633955, | |
| "grad_norm": 0.6098750233650208, | |
| "learning_rate": 5.776173285198555e-07, | |
| "loss": 0.3622, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.059000602046959665, | |
| "grad_norm": 0.6201721429824829, | |
| "learning_rate": 5.896510228640193e-07, | |
| "loss": 0.3329, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.060204695966285374, | |
| "grad_norm": 0.7006021738052368, | |
| "learning_rate": 6.01684717208183e-07, | |
| "loss": 0.3487, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.061408789885611076, | |
| "grad_norm": 0.708990216255188, | |
| "learning_rate": 6.137184115523465e-07, | |
| "loss": 0.3448, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.06261288380493679, | |
| "grad_norm": 0.7767229676246643, | |
| "learning_rate": 6.257521058965102e-07, | |
| "loss": 0.3751, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.0638169777242625, | |
| "grad_norm": 0.6051218509674072, | |
| "learning_rate": 6.377858002406738e-07, | |
| "loss": 0.3502, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0650210716435882, | |
| "grad_norm": 0.7111226916313171, | |
| "learning_rate": 6.498194945848375e-07, | |
| "loss": 0.3625, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 0.7441733479499817, | |
| "learning_rate": 6.618531889290013e-07, | |
| "loss": 0.3269, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.06742925948223961, | |
| "grad_norm": 0.6909326910972595, | |
| "learning_rate": 6.738868832731648e-07, | |
| "loss": 0.3302, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.06863335340156532, | |
| "grad_norm": 0.7504749298095703, | |
| "learning_rate": 6.859205776173285e-07, | |
| "loss": 0.3425, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.06983744732089103, | |
| "grad_norm": 0.5878099799156189, | |
| "learning_rate": 6.979542719614921e-07, | |
| "loss": 0.3504, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.07104154124021674, | |
| "grad_norm": 0.5515761971473694, | |
| "learning_rate": 7.099879663056558e-07, | |
| "loss": 0.3409, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.07224563515954245, | |
| "grad_norm": 0.57797771692276, | |
| "learning_rate": 7.220216606498195e-07, | |
| "loss": 0.3416, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07344972907886815, | |
| "grad_norm": 0.4524708390235901, | |
| "learning_rate": 7.34055354993983e-07, | |
| "loss": 0.3581, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.07465382299819386, | |
| "grad_norm": 0.718927800655365, | |
| "learning_rate": 7.460890493381468e-07, | |
| "loss": 0.3609, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.07585791691751957, | |
| "grad_norm": 0.5666077733039856, | |
| "learning_rate": 7.581227436823105e-07, | |
| "loss": 0.335, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.07706201083684527, | |
| "grad_norm": 0.5896601676940918, | |
| "learning_rate": 7.701564380264741e-07, | |
| "loss": 0.3274, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.07826610475617098, | |
| "grad_norm": 0.6044319868087769, | |
| "learning_rate": 7.821901323706378e-07, | |
| "loss": 0.3407, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.07947019867549669, | |
| "grad_norm": 0.6831541061401367, | |
| "learning_rate": 7.942238267148013e-07, | |
| "loss": 0.3333, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.0806742925948224, | |
| "grad_norm": 0.7124572396278381, | |
| "learning_rate": 8.06257521058965e-07, | |
| "loss": 0.3326, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.08187838651414811, | |
| "grad_norm": 0.732711136341095, | |
| "learning_rate": 8.182912154031288e-07, | |
| "loss": 0.3487, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.08308248043347381, | |
| "grad_norm": 0.7555579543113708, | |
| "learning_rate": 8.303249097472924e-07, | |
| "loss": 0.3218, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.08428657435279951, | |
| "grad_norm": 0.7618419528007507, | |
| "learning_rate": 8.423586040914561e-07, | |
| "loss": 0.3231, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08549066827212523, | |
| "grad_norm": 0.7383216023445129, | |
| "learning_rate": 8.543922984356197e-07, | |
| "loss": 0.3218, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.08669476219145093, | |
| "grad_norm": 0.5902182459831238, | |
| "learning_rate": 8.664259927797833e-07, | |
| "loss": 0.3367, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.08789885611077664, | |
| "grad_norm": 0.6107906103134155, | |
| "learning_rate": 8.78459687123947e-07, | |
| "loss": 0.3331, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.08910295003010235, | |
| "grad_norm": 0.7179387211799622, | |
| "learning_rate": 8.904933814681108e-07, | |
| "loss": 0.3347, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.09030704394942805, | |
| "grad_norm": 0.8263080716133118, | |
| "learning_rate": 9.025270758122743e-07, | |
| "loss": 0.3247, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09151113786875377, | |
| "grad_norm": 0.8549688458442688, | |
| "learning_rate": 9.14560770156438e-07, | |
| "loss": 0.3239, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.09271523178807947, | |
| "grad_norm": 0.6674267053604126, | |
| "learning_rate": 9.265944645006016e-07, | |
| "loss": 0.333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.09391932570740517, | |
| "grad_norm": 0.5892189741134644, | |
| "learning_rate": 9.386281588447653e-07, | |
| "loss": 0.322, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.09512341962673089, | |
| "grad_norm": 0.7087513208389282, | |
| "learning_rate": 9.50661853188929e-07, | |
| "loss": 0.327, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.09632751354605659, | |
| "grad_norm": 0.6016402840614319, | |
| "learning_rate": 9.626955475330926e-07, | |
| "loss": 0.3255, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0975316074653823, | |
| "grad_norm": 0.5783524513244629, | |
| "learning_rate": 9.747292418772562e-07, | |
| "loss": 0.3128, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.09873570138470801, | |
| "grad_norm": 0.6049711108207703, | |
| "learning_rate": 9.8676293622142e-07, | |
| "loss": 0.3257, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.09993979530403371, | |
| "grad_norm": 0.6259274482727051, | |
| "learning_rate": 9.987966305655835e-07, | |
| "loss": 0.3318, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.10114388922335943, | |
| "grad_norm": 0.5331777930259705, | |
| "learning_rate": 9.999964221834556e-07, | |
| "loss": 0.3133, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.10234798314268513, | |
| "grad_norm": 0.5190764665603638, | |
| "learning_rate": 9.999840544882987e-07, | |
| "loss": 0.3349, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.10355207706201083, | |
| "grad_norm": 0.5867928862571716, | |
| "learning_rate": 9.99962852962418e-07, | |
| "loss": 0.3252, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.10475617098133655, | |
| "grad_norm": 0.7667666673660278, | |
| "learning_rate": 9.999328179804064e-07, | |
| "loss": 0.3269, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.10596026490066225, | |
| "grad_norm": 0.5684708952903748, | |
| "learning_rate": 9.998939500729291e-07, | |
| "loss": 0.3204, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.10716435881998795, | |
| "grad_norm": 0.5369793772697449, | |
| "learning_rate": 9.99846249926713e-07, | |
| "loss": 0.2997, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.10836845273931367, | |
| "grad_norm": 0.5773791074752808, | |
| "learning_rate": 9.997897183845347e-07, | |
| "loss": 0.3147, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.10957254665863937, | |
| "grad_norm": 0.571826159954071, | |
| "learning_rate": 9.997243564452064e-07, | |
| "loss": 0.32, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.11077664057796509, | |
| "grad_norm": 0.420244961977005, | |
| "learning_rate": 9.996501652635578e-07, | |
| "loss": 0.3141, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.11198073449729079, | |
| "grad_norm": 0.5253920555114746, | |
| "learning_rate": 9.99567146150415e-07, | |
| "loss": 0.3201, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.11318482841661649, | |
| "grad_norm": 0.49279969930648804, | |
| "learning_rate": 9.994753005725785e-07, | |
| "loss": 0.3076, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.11438892233594221, | |
| "grad_norm": 0.6114805936813354, | |
| "learning_rate": 9.993746301527965e-07, | |
| "loss": 0.3209, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.11559301625526791, | |
| "grad_norm": 1.6514418125152588, | |
| "learning_rate": 9.99265136669737e-07, | |
| "loss": 0.319, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.11679711017459361, | |
| "grad_norm": 0.6415925621986389, | |
| "learning_rate": 9.99146822057955e-07, | |
| "loss": 0.3268, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.11800120409391933, | |
| "grad_norm": 0.5680079460144043, | |
| "learning_rate": 9.990196884078599e-07, | |
| "loss": 0.3139, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.11920529801324503, | |
| "grad_norm": 0.715497612953186, | |
| "learning_rate": 9.988837379656778e-07, | |
| "loss": 0.3143, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.12040939193257075, | |
| "grad_norm": 0.6379466652870178, | |
| "learning_rate": 9.987389731334112e-07, | |
| "loss": 0.3037, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12161348585189645, | |
| "grad_norm": 0.5227240920066833, | |
| "learning_rate": 9.985853964687985e-07, | |
| "loss": 0.3202, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.12281757977122215, | |
| "grad_norm": 0.5148226022720337, | |
| "learning_rate": 9.984230106852658e-07, | |
| "loss": 0.3089, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.12402167369054787, | |
| "grad_norm": 0.8337252140045166, | |
| "learning_rate": 9.982518186518824e-07, | |
| "loss": 0.3093, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.12522576760987358, | |
| "grad_norm": 0.5874176621437073, | |
| "learning_rate": 9.980718233933072e-07, | |
| "loss": 0.3257, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.12642986152919927, | |
| "grad_norm": 0.6203235983848572, | |
| "learning_rate": 9.978830280897373e-07, | |
| "loss": 0.3094, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.127633955448525, | |
| "grad_norm": 0.7386701107025146, | |
| "learning_rate": 9.976854360768501e-07, | |
| "loss": 0.3283, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.1288380493678507, | |
| "grad_norm": 0.7480394244194031, | |
| "learning_rate": 9.97479050845746e-07, | |
| "loss": 0.322, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1300421432871764, | |
| "grad_norm": 0.6779530048370361, | |
| "learning_rate": 9.97263876042886e-07, | |
| "loss": 0.3263, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1312462372065021, | |
| "grad_norm": 1.0457607507705688, | |
| "learning_rate": 9.970399154700262e-07, | |
| "loss": 0.324, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 0.4574492871761322, | |
| "learning_rate": 9.96807173084153e-07, | |
| "loss": 0.3033, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1336544250451535, | |
| "grad_norm": 0.4800940454006195, | |
| "learning_rate": 9.965656529974108e-07, | |
| "loss": 0.3076, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.13485851896447923, | |
| "grad_norm": 0.5336936116218567, | |
| "learning_rate": 9.96315359477031e-07, | |
| "loss": 0.3029, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.13606261288380495, | |
| "grad_norm": 0.9403670430183411, | |
| "learning_rate": 9.960562969452559e-07, | |
| "loss": 0.3019, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.13726670680313063, | |
| "grad_norm": 0.6152085661888123, | |
| "learning_rate": 9.957884699792604e-07, | |
| "loss": 0.3051, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.13847080072245635, | |
| "grad_norm": 0.7313536405563354, | |
| "learning_rate": 9.955118833110716e-07, | |
| "loss": 0.3137, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.13967489464178207, | |
| "grad_norm": 0.47397103905677795, | |
| "learning_rate": 9.95226541827485e-07, | |
| "loss": 0.3214, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.14087898856110775, | |
| "grad_norm": 0.4812333881855011, | |
| "learning_rate": 9.949324505699782e-07, | |
| "loss": 0.3164, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.14208308248043347, | |
| "grad_norm": 0.6729305386543274, | |
| "learning_rate": 9.946296147346215e-07, | |
| "loss": 0.2946, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1432871763997592, | |
| "grad_norm": 0.6568790078163147, | |
| "learning_rate": 9.943180396719867e-07, | |
| "loss": 0.2929, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.1444912703190849, | |
| "grad_norm": 0.5633556842803955, | |
| "learning_rate": 9.939977308870518e-07, | |
| "loss": 0.3073, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1456953642384106, | |
| "grad_norm": 1.1128957271575928, | |
| "learning_rate": 9.936686940391048e-07, | |
| "loss": 0.3264, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.1468994581577363, | |
| "grad_norm": 0.5192599892616272, | |
| "learning_rate": 9.933309349416428e-07, | |
| "loss": 0.3064, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.14810355207706202, | |
| "grad_norm": 0.49194392561912537, | |
| "learning_rate": 9.92984459562269e-07, | |
| "loss": 0.302, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1493076459963877, | |
| "grad_norm": 0.5606468915939331, | |
| "learning_rate": 9.926292740225888e-07, | |
| "loss": 0.3037, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.15051173991571343, | |
| "grad_norm": 0.544266939163208, | |
| "learning_rate": 9.922653845981e-07, | |
| "loss": 0.3025, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.15171583383503914, | |
| "grad_norm": 1.0137197971343994, | |
| "learning_rate": 9.918927977180826e-07, | |
| "loss": 0.2998, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.15291992775436483, | |
| "grad_norm": 0.4881134629249573, | |
| "learning_rate": 9.91511519965486e-07, | |
| "loss": 0.2975, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.15412402167369055, | |
| "grad_norm": 0.4854426383972168, | |
| "learning_rate": 9.911215580768106e-07, | |
| "loss": 0.3109, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.15532811559301626, | |
| "grad_norm": 0.5056730508804321, | |
| "learning_rate": 9.90722918941991e-07, | |
| "loss": 0.3121, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.15653220951234195, | |
| "grad_norm": 0.5286668539047241, | |
| "learning_rate": 9.903156096042734e-07, | |
| "loss": 0.2982, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.15773630343166767, | |
| "grad_norm": 0.5490984916687012, | |
| "learning_rate": 9.898996372600903e-07, | |
| "loss": 0.3115, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.15894039735099338, | |
| "grad_norm": 0.614521861076355, | |
| "learning_rate": 9.894750092589349e-07, | |
| "loss": 0.2985, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.16014449127031907, | |
| "grad_norm": 0.5678403973579407, | |
| "learning_rate": 9.8904173310323e-07, | |
| "loss": 0.3046, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.1613485851896448, | |
| "grad_norm": 0.5179656147956848, | |
| "learning_rate": 9.885998164481966e-07, | |
| "loss": 0.3053, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.1625526791089705, | |
| "grad_norm": 0.526849091053009, | |
| "learning_rate": 9.881492671017172e-07, | |
| "loss": 0.3143, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.16375677302829622, | |
| "grad_norm": 0.5683344006538391, | |
| "learning_rate": 9.876900930241991e-07, | |
| "loss": 0.3031, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.1649608669476219, | |
| "grad_norm": 0.5243839621543884, | |
| "learning_rate": 9.872223023284333e-07, | |
| "loss": 0.312, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.16616496086694763, | |
| "grad_norm": 0.5260365605354309, | |
| "learning_rate": 9.867459032794508e-07, | |
| "loss": 0.3037, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.16736905478627334, | |
| "grad_norm": 0.4755154252052307, | |
| "learning_rate": 9.86260904294377e-07, | |
| "loss": 0.2916, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.16857314870559903, | |
| "grad_norm": 0.5555715560913086, | |
| "learning_rate": 9.857673139422833e-07, | |
| "loss": 0.3135, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.16977724262492475, | |
| "grad_norm": 0.5810279250144958, | |
| "learning_rate": 9.85265140944035e-07, | |
| "loss": 0.3104, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.17098133654425046, | |
| "grad_norm": 0.48022618889808655, | |
| "learning_rate": 9.847543941721379e-07, | |
| "loss": 0.3022, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.17218543046357615, | |
| "grad_norm": 0.5191965103149414, | |
| "learning_rate": 9.842350826505802e-07, | |
| "loss": 0.3018, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.17338952438290187, | |
| "grad_norm": 1.2972302436828613, | |
| "learning_rate": 9.837072155546753e-07, | |
| "loss": 0.3026, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.17459361830222758, | |
| "grad_norm": 0.47315987944602966, | |
| "learning_rate": 9.831708022108972e-07, | |
| "loss": 0.311, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.17579771222155327, | |
| "grad_norm": 0.5953189134597778, | |
| "learning_rate": 9.826258520967177e-07, | |
| "loss": 0.3071, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.177001806140879, | |
| "grad_norm": 0.5407562851905823, | |
| "learning_rate": 9.820723748404382e-07, | |
| "loss": 0.31, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.1782059000602047, | |
| "grad_norm": 0.5249618291854858, | |
| "learning_rate": 9.815103802210193e-07, | |
| "loss": 0.2898, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.1794099939795304, | |
| "grad_norm": 0.5347439646720886, | |
| "learning_rate": 9.80939878167908e-07, | |
| "loss": 0.2944, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.1806140878988561, | |
| "grad_norm": 0.49509304761886597, | |
| "learning_rate": 9.80360878760863e-07, | |
| "loss": 0.3073, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.5182557106018066, | |
| "learning_rate": 9.79773392229776e-07, | |
| "loss": 0.3092, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.18302227573750754, | |
| "grad_norm": 0.5343918204307556, | |
| "learning_rate": 9.79177428954492e-07, | |
| "loss": 0.3058, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.18422636965683323, | |
| "grad_norm": 0.42448320984840393, | |
| "learning_rate": 9.785729994646228e-07, | |
| "loss": 0.2966, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.18543046357615894, | |
| "grad_norm": 0.514305055141449, | |
| "learning_rate": 9.779601144393655e-07, | |
| "loss": 0.3063, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.18663455749548466, | |
| "grad_norm": 0.559808075428009, | |
| "learning_rate": 9.773387847073102e-07, | |
| "loss": 0.3103, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.18783865141481035, | |
| "grad_norm": 0.5099034905433655, | |
| "learning_rate": 9.767090212462506e-07, | |
| "loss": 0.3045, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.18904274533413606, | |
| "grad_norm": 0.5309582352638245, | |
| "learning_rate": 9.76070835182989e-07, | |
| "loss": 0.3198, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.19024683925346178, | |
| "grad_norm": 0.5174340605735779, | |
| "learning_rate": 9.754242377931402e-07, | |
| "loss": 0.3019, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.19145093317278747, | |
| "grad_norm": 0.47818174958229065, | |
| "learning_rate": 9.747692405009327e-07, | |
| "loss": 0.2885, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.19265502709211318, | |
| "grad_norm": 0.4435511529445648, | |
| "learning_rate": 9.741058548790055e-07, | |
| "loss": 0.2716, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1938591210114389, | |
| "grad_norm": 0.47226864099502563, | |
| "learning_rate": 9.734340926482052e-07, | |
| "loss": 0.2911, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.1950632149307646, | |
| "grad_norm": 0.4990203082561493, | |
| "learning_rate": 9.72753965677378e-07, | |
| "loss": 0.3119, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.1962673088500903, | |
| "grad_norm": 0.6255252957344055, | |
| "learning_rate": 9.7206548598316e-07, | |
| "loss": 0.2902, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.19747140276941602, | |
| "grad_norm": 0.5827116370201111, | |
| "learning_rate": 9.713686657297655e-07, | |
| "loss": 0.3079, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 0.5475650429725647, | |
| "learning_rate": 9.706635172287715e-07, | |
| "loss": 0.3095, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.19987959060806743, | |
| "grad_norm": 0.674460768699646, | |
| "learning_rate": 9.699500529389001e-07, | |
| "loss": 0.2953, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.20108368452739314, | |
| "grad_norm": 0.5000407695770264, | |
| "learning_rate": 9.692282854657989e-07, | |
| "loss": 0.3055, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.20228777844671886, | |
| "grad_norm": 0.5063086748123169, | |
| "learning_rate": 9.684982275618178e-07, | |
| "loss": 0.2952, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.20349187236604455, | |
| "grad_norm": 0.6266674399375916, | |
| "learning_rate": 9.677598921257842e-07, | |
| "loss": 0.3028, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.20469596628537026, | |
| "grad_norm": 1.3428351879119873, | |
| "learning_rate": 9.67013292202775e-07, | |
| "loss": 0.3165, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.20590006020469598, | |
| "grad_norm": 0.6307231187820435, | |
| "learning_rate": 9.66258440983885e-07, | |
| "loss": 0.3112, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.20710415412402167, | |
| "grad_norm": 0.5176913738250732, | |
| "learning_rate": 9.654953518059953e-07, | |
| "loss": 0.3042, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.20830824804334738, | |
| "grad_norm": 0.4618211090564728, | |
| "learning_rate": 9.647240381515376e-07, | |
| "loss": 0.3107, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2095123419626731, | |
| "grad_norm": 0.4354129135608673, | |
| "learning_rate": 9.639445136482546e-07, | |
| "loss": 0.2932, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2107164358819988, | |
| "grad_norm": 0.6150096654891968, | |
| "learning_rate": 9.631567920689607e-07, | |
| "loss": 0.2898, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2119205298013245, | |
| "grad_norm": 0.4629852771759033, | |
| "learning_rate": 9.623608873312979e-07, | |
| "loss": 0.2969, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.21312462372065022, | |
| "grad_norm": 0.4912186563014984, | |
| "learning_rate": 9.615568134974902e-07, | |
| "loss": 0.3037, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2143287176399759, | |
| "grad_norm": 0.5452593564987183, | |
| "learning_rate": 9.607445847740946e-07, | |
| "loss": 0.3011, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.21553281155930162, | |
| "grad_norm": 0.5524305701255798, | |
| "learning_rate": 9.599242155117514e-07, | |
| "loss": 0.3056, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.21673690547862734, | |
| "grad_norm": 0.4734737277030945, | |
| "learning_rate": 9.590957202049288e-07, | |
| "loss": 0.2937, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.21794099939795303, | |
| "grad_norm": 0.5050627589225769, | |
| "learning_rate": 9.582591134916683e-07, | |
| "loss": 0.2964, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.21914509331727874, | |
| "grad_norm": 0.5784972310066223, | |
| "learning_rate": 9.574144101533258e-07, | |
| "loss": 0.3126, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.22034918723660446, | |
| "grad_norm": 0.67679762840271, | |
| "learning_rate": 9.565616251143093e-07, | |
| "loss": 0.2997, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.22155328115593018, | |
| "grad_norm": 0.730844259262085, | |
| "learning_rate": 9.55700773441817e-07, | |
| "loss": 0.2992, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.22275737507525586, | |
| "grad_norm": 0.511701226234436, | |
| "learning_rate": 9.5483187034557e-07, | |
| "loss": 0.2843, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.22396146899458158, | |
| "grad_norm": 0.49653661251068115, | |
| "learning_rate": 9.539549311775434e-07, | |
| "loss": 0.3003, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2251655629139073, | |
| "grad_norm": 0.479397714138031, | |
| "learning_rate": 9.530699714316955e-07, | |
| "loss": 0.3007, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.22636965683323299, | |
| "grad_norm": 0.5917854905128479, | |
| "learning_rate": 9.521770067436944e-07, | |
| "loss": 0.2818, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.2275737507525587, | |
| "grad_norm": 0.4750485420227051, | |
| "learning_rate": 9.512760528906409e-07, | |
| "loss": 0.3107, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.22877784467188442, | |
| "grad_norm": 0.5081465244293213, | |
| "learning_rate": 9.503671257907905e-07, | |
| "loss": 0.3003, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2299819385912101, | |
| "grad_norm": 0.7816819548606873, | |
| "learning_rate": 9.494502415032714e-07, | |
| "loss": 0.2898, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.23118603251053582, | |
| "grad_norm": 0.600690484046936, | |
| "learning_rate": 9.485254162278013e-07, | |
| "loss": 0.2975, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.23239012642986154, | |
| "grad_norm": 0.6016291379928589, | |
| "learning_rate": 9.475926663044016e-07, | |
| "loss": 0.2895, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.23359422034918723, | |
| "grad_norm": 0.5959491729736328, | |
| "learning_rate": 9.466520082131074e-07, | |
| "loss": 0.293, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.23479831426851294, | |
| "grad_norm": 0.5337576270103455, | |
| "learning_rate": 9.457034585736776e-07, | |
| "loss": 0.2954, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.23600240818783866, | |
| "grad_norm": 0.5701966881752014, | |
| "learning_rate": 9.447470341453003e-07, | |
| "loss": 0.3016, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.23720650210716435, | |
| "grad_norm": 0.48122677206993103, | |
| "learning_rate": 9.437827518262976e-07, | |
| "loss": 0.2834, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.23841059602649006, | |
| "grad_norm": 0.6107509732246399, | |
| "learning_rate": 9.428106286538263e-07, | |
| "loss": 0.2865, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.23961468994581578, | |
| "grad_norm": 0.4537561237812042, | |
| "learning_rate": 9.418306818035773e-07, | |
| "loss": 0.2981, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.2408187838651415, | |
| "grad_norm": 0.6205712556838989, | |
| "learning_rate": 9.408429285894721e-07, | |
| "loss": 0.3099, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24202287778446718, | |
| "grad_norm": 0.4940670132637024, | |
| "learning_rate": 9.398473864633564e-07, | |
| "loss": 0.2942, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2432269717037929, | |
| "grad_norm": 0.45464888215065, | |
| "learning_rate": 9.388440730146923e-07, | |
| "loss": 0.2875, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.24443106562311862, | |
| "grad_norm": 0.4339371919631958, | |
| "learning_rate": 9.378330059702479e-07, | |
| "loss": 0.284, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2456351595424443, | |
| "grad_norm": 0.6798887848854065, | |
| "learning_rate": 9.368142031937826e-07, | |
| "loss": 0.3079, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.24683925346177002, | |
| "grad_norm": 0.504805326461792, | |
| "learning_rate": 9.357876826857334e-07, | |
| "loss": 0.2942, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.24804334738109574, | |
| "grad_norm": 1.0256134271621704, | |
| "learning_rate": 9.347534625828955e-07, | |
| "loss": 0.2958, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.24924744130042142, | |
| "grad_norm": 0.7034043073654175, | |
| "learning_rate": 9.337115611581019e-07, | |
| "loss": 0.2977, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.25045153521974717, | |
| "grad_norm": 0.6767880916595459, | |
| "learning_rate": 9.326619968199016e-07, | |
| "loss": 0.2843, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.25165562913907286, | |
| "grad_norm": 0.5257042050361633, | |
| "learning_rate": 9.316047881122334e-07, | |
| "loss": 0.2869, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.25285972305839854, | |
| "grad_norm": 0.5919986963272095, | |
| "learning_rate": 9.305399537140983e-07, | |
| "loss": 0.3009, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2540638169777243, | |
| "grad_norm": 0.5936114192008972, | |
| "learning_rate": 9.294675124392302e-07, | |
| "loss": 0.2863, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.25526791089705, | |
| "grad_norm": 1.1754176616668701, | |
| "learning_rate": 9.283874832357625e-07, | |
| "loss": 0.2808, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.25647200481637566, | |
| "grad_norm": 0.6144666075706482, | |
| "learning_rate": 9.272998851858943e-07, | |
| "loss": 0.2854, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.2576760987357014, | |
| "grad_norm": 0.47984328866004944, | |
| "learning_rate": 9.262047375055524e-07, | |
| "loss": 0.2978, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.2588801926550271, | |
| "grad_norm": 0.6158226728439331, | |
| "learning_rate": 9.251020595440524e-07, | |
| "loss": 0.3072, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2600842865743528, | |
| "grad_norm": 0.6357386708259583, | |
| "learning_rate": 9.239918707837564e-07, | |
| "loss": 0.2927, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.26128838049367853, | |
| "grad_norm": 0.6893799901008606, | |
| "learning_rate": 9.228741908397293e-07, | |
| "loss": 0.2988, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2624924744130042, | |
| "grad_norm": 0.5763195157051086, | |
| "learning_rate": 9.217490394593914e-07, | |
| "loss": 0.3049, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.2636965683323299, | |
| "grad_norm": 0.5649781823158264, | |
| "learning_rate": 9.206164365221706e-07, | |
| "loss": 0.3083, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 0.4519605040550232, | |
| "learning_rate": 9.194764020391506e-07, | |
| "loss": 0.274, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.26610475617098134, | |
| "grad_norm": 0.5203403830528259, | |
| "learning_rate": 9.183289561527164e-07, | |
| "loss": 0.2823, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.267308850090307, | |
| "grad_norm": 0.525934100151062, | |
| "learning_rate": 9.171741191362005e-07, | |
| "loss": 0.2928, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.26851294400963277, | |
| "grad_norm": 0.5151864290237427, | |
| "learning_rate": 9.160119113935227e-07, | |
| "loss": 0.2914, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.26971703792895846, | |
| "grad_norm": 0.663339376449585, | |
| "learning_rate": 9.14842353458831e-07, | |
| "loss": 0.301, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.27092113184828415, | |
| "grad_norm": 0.5526972413063049, | |
| "learning_rate": 9.136654659961381e-07, | |
| "loss": 0.2931, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2721252257676099, | |
| "grad_norm": 0.6518740057945251, | |
| "learning_rate": 9.12481269798956e-07, | |
| "loss": 0.2772, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.2733293196869356, | |
| "grad_norm": 0.5191295742988586, | |
| "learning_rate": 9.112897857899298e-07, | |
| "loss": 0.2933, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.27453341360626127, | |
| "grad_norm": 1.087936282157898, | |
| "learning_rate": 9.100910350204669e-07, | |
| "loss": 0.2956, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.275737507525587, | |
| "grad_norm": 0.5870952010154724, | |
| "learning_rate": 9.088850386703653e-07, | |
| "loss": 0.2857, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.2769416014449127, | |
| "grad_norm": 0.5123207569122314, | |
| "learning_rate": 9.076718180474399e-07, | |
| "loss": 0.3005, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2781456953642384, | |
| "grad_norm": 0.47658002376556396, | |
| "learning_rate": 9.064513945871457e-07, | |
| "loss": 0.2889, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.27934978928356413, | |
| "grad_norm": 0.564738929271698, | |
| "learning_rate": 9.052237898521984e-07, | |
| "loss": 0.2929, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2805538832028898, | |
| "grad_norm": 0.47116583585739136, | |
| "learning_rate": 9.03989025532195e-07, | |
| "loss": 0.2942, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.2817579771222155, | |
| "grad_norm": 0.5838178396224976, | |
| "learning_rate": 9.027471234432292e-07, | |
| "loss": 0.2883, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.28296207104154125, | |
| "grad_norm": 0.48679229617118835, | |
| "learning_rate": 9.014981055275059e-07, | |
| "loss": 0.29, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.28416616496086694, | |
| "grad_norm": 0.5863898992538452, | |
| "learning_rate": 9.00241993852955e-07, | |
| "loss": 0.2871, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.28537025888019263, | |
| "grad_norm": 0.5949921607971191, | |
| "learning_rate": 8.989788106128402e-07, | |
| "loss": 0.2927, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.2865743527995184, | |
| "grad_norm": 0.42538484930992126, | |
| "learning_rate": 8.977085781253668e-07, | |
| "loss": 0.2825, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.28777844671884406, | |
| "grad_norm": 0.5678000450134277, | |
| "learning_rate": 8.964313188332881e-07, | |
| "loss": 0.294, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.2889825406381698, | |
| "grad_norm": 0.5283777713775635, | |
| "learning_rate": 8.951470553035086e-07, | |
| "loss": 0.286, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2901866345574955, | |
| "grad_norm": 0.8639681935310364, | |
| "learning_rate": 8.938558102266851e-07, | |
| "loss": 0.2971, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.2913907284768212, | |
| "grad_norm": 0.5353107452392578, | |
| "learning_rate": 8.925576064168261e-07, | |
| "loss": 0.3038, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.2925948223961469, | |
| "grad_norm": 0.5691916346549988, | |
| "learning_rate": 8.912524668108885e-07, | |
| "loss": 0.2901, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.2937989163154726, | |
| "grad_norm": 0.5999578833580017, | |
| "learning_rate": 8.899404144683724e-07, | |
| "loss": 0.2864, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.2950030102347983, | |
| "grad_norm": 0.6660271883010864, | |
| "learning_rate": 8.886214725709136e-07, | |
| "loss": 0.2866, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.29620710415412405, | |
| "grad_norm": 0.5501262545585632, | |
| "learning_rate": 8.872956644218742e-07, | |
| "loss": 0.2909, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.29741119807344973, | |
| "grad_norm": 0.44489532709121704, | |
| "learning_rate": 8.859630134459308e-07, | |
| "loss": 0.2869, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.2986152919927754, | |
| "grad_norm": 0.619097113609314, | |
| "learning_rate": 8.846235431886604e-07, | |
| "loss": 0.2782, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.29981938591210117, | |
| "grad_norm": 0.49712878465652466, | |
| "learning_rate": 8.832772773161251e-07, | |
| "loss": 0.2848, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.30102347983142685, | |
| "grad_norm": 0.46963346004486084, | |
| "learning_rate": 8.819242396144529e-07, | |
| "loss": 0.2915, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.30222757375075254, | |
| "grad_norm": 0.5881354212760925, | |
| "learning_rate": 8.805644539894181e-07, | |
| "loss": 0.2969, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.3034316676700783, | |
| "grad_norm": 0.5345028042793274, | |
| "learning_rate": 8.791979444660193e-07, | |
| "loss": 0.2985, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.304635761589404, | |
| "grad_norm": 0.5038124322891235, | |
| "learning_rate": 8.778247351880536e-07, | |
| "loss": 0.2931, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.30583985550872966, | |
| "grad_norm": 0.6723479628562927, | |
| "learning_rate": 8.764448504176919e-07, | |
| "loss": 0.2885, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.3070439494280554, | |
| "grad_norm": 0.474516361951828, | |
| "learning_rate": 8.750583145350483e-07, | |
| "loss": 0.2906, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3082480433473811, | |
| "grad_norm": 0.509379506111145, | |
| "learning_rate": 8.736651520377507e-07, | |
| "loss": 0.2874, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3094521372667068, | |
| "grad_norm": 0.9317507743835449, | |
| "learning_rate": 8.722653875405075e-07, | |
| "loss": 0.2891, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.3106562311860325, | |
| "grad_norm": 0.4634588360786438, | |
| "learning_rate": 8.708590457746727e-07, | |
| "loss": 0.284, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.3118603251053582, | |
| "grad_norm": 0.4674171209335327, | |
| "learning_rate": 8.694461515878088e-07, | |
| "loss": 0.2851, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3130644190246839, | |
| "grad_norm": 0.4606451988220215, | |
| "learning_rate": 8.68026729943248e-07, | |
| "loss": 0.282, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.31426851294400965, | |
| "grad_norm": 0.5793256163597107, | |
| "learning_rate": 8.666008059196513e-07, | |
| "loss": 0.2852, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.31547260686333534, | |
| "grad_norm": 0.742026686668396, | |
| "learning_rate": 8.65168404710565e-07, | |
| "loss": 0.2909, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.316676700782661, | |
| "grad_norm": 0.469868928194046, | |
| "learning_rate": 8.637295516239757e-07, | |
| "loss": 0.2784, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.31788079470198677, | |
| "grad_norm": 0.6895257234573364, | |
| "learning_rate": 8.622842720818635e-07, | |
| "loss": 0.2849, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.31908488862131246, | |
| "grad_norm": 0.6843047142028809, | |
| "learning_rate": 8.608325916197524e-07, | |
| "loss": 0.2969, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.32028898254063815, | |
| "grad_norm": 2.822052240371704, | |
| "learning_rate": 8.593745358862592e-07, | |
| "loss": 0.2954, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3214930764599639, | |
| "grad_norm": 0.5745678544044495, | |
| "learning_rate": 8.579101306426406e-07, | |
| "loss": 0.3005, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.3226971703792896, | |
| "grad_norm": 0.4625186026096344, | |
| "learning_rate": 8.564394017623378e-07, | |
| "loss": 0.2889, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.32390126429861527, | |
| "grad_norm": 0.5813141465187073, | |
| "learning_rate": 8.549623752305192e-07, | |
| "loss": 0.2926, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.325105358217941, | |
| "grad_norm": 0.49706658720970154, | |
| "learning_rate": 8.534790771436222e-07, | |
| "loss": 0.2884, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3263094521372667, | |
| "grad_norm": 0.5477120280265808, | |
| "learning_rate": 8.519895337088907e-07, | |
| "loss": 0.2922, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.32751354605659244, | |
| "grad_norm": 1.157457709312439, | |
| "learning_rate": 8.504937712439131e-07, | |
| "loss": 0.2699, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.32871763997591813, | |
| "grad_norm": 0.5263344049453735, | |
| "learning_rate": 8.48991816176157e-07, | |
| "loss": 0.2888, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.3299217338952438, | |
| "grad_norm": 0.764481782913208, | |
| "learning_rate": 8.474836950425026e-07, | |
| "loss": 0.292, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 0.5704035758972168, | |
| "learning_rate": 8.459694344887731e-07, | |
| "loss": 0.2928, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.33232992173389525, | |
| "grad_norm": 0.46473219990730286, | |
| "learning_rate": 8.444490612692645e-07, | |
| "loss": 0.2816, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.33353401565322094, | |
| "grad_norm": 0.5250662565231323, | |
| "learning_rate": 8.429226022462728e-07, | |
| "loss": 0.2881, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.3347381095725467, | |
| "grad_norm": 0.6085227727890015, | |
| "learning_rate": 8.413900843896193e-07, | |
| "loss": 0.3122, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.33594220349187237, | |
| "grad_norm": 0.7203246355056763, | |
| "learning_rate": 8.398515347761745e-07, | |
| "loss": 0.2911, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.33714629741119806, | |
| "grad_norm": 0.5305497050285339, | |
| "learning_rate": 8.383069805893784e-07, | |
| "loss": 0.2888, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3383503913305238, | |
| "grad_norm": 0.5452449917793274, | |
| "learning_rate": 8.367564491187622e-07, | |
| "loss": 0.2866, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.3395544852498495, | |
| "grad_norm": 0.4815659523010254, | |
| "learning_rate": 8.351999677594645e-07, | |
| "loss": 0.2863, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.3407585791691752, | |
| "grad_norm": 0.5499128103256226, | |
| "learning_rate": 8.336375640117481e-07, | |
| "loss": 0.2865, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.3419626730885009, | |
| "grad_norm": 0.559804379940033, | |
| "learning_rate": 8.320692654805136e-07, | |
| "loss": 0.2833, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.3431667670078266, | |
| "grad_norm": 0.5070551633834839, | |
| "learning_rate": 8.304950998748124e-07, | |
| "loss": 0.2969, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3443708609271523, | |
| "grad_norm": 0.5566725730895996, | |
| "learning_rate": 8.289150950073564e-07, | |
| "loss": 0.2814, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.34557495484647804, | |
| "grad_norm": 0.5421969890594482, | |
| "learning_rate": 8.273292787940268e-07, | |
| "loss": 0.2805, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.34677904876580373, | |
| "grad_norm": 0.49686506390571594, | |
| "learning_rate": 8.257376792533813e-07, | |
| "loss": 0.2872, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3479831426851294, | |
| "grad_norm": 0.4665164649486542, | |
| "learning_rate": 8.241403245061584e-07, | |
| "loss": 0.2816, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.34918723660445516, | |
| "grad_norm": 0.4437556266784668, | |
| "learning_rate": 8.225372427747813e-07, | |
| "loss": 0.286, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.35039133052378085, | |
| "grad_norm": 0.5280335545539856, | |
| "learning_rate": 8.209284623828583e-07, | |
| "loss": 0.2895, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.35159542444310654, | |
| "grad_norm": 0.5298367142677307, | |
| "learning_rate": 8.193140117546832e-07, | |
| "loss": 0.282, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.3527995183624323, | |
| "grad_norm": 0.7123149633407593, | |
| "learning_rate": 8.176939194147329e-07, | |
| "loss": 0.2841, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.354003612281758, | |
| "grad_norm": 0.6565315127372742, | |
| "learning_rate": 8.160682139871632e-07, | |
| "loss": 0.2793, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.35520770620108366, | |
| "grad_norm": 0.7005172967910767, | |
| "learning_rate": 8.144369241953032e-07, | |
| "loss": 0.2854, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3564118001204094, | |
| "grad_norm": 0.7468757033348083, | |
| "learning_rate": 8.128000788611478e-07, | |
| "loss": 0.2992, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.3576158940397351, | |
| "grad_norm": 0.5055456161499023, | |
| "learning_rate": 8.111577069048487e-07, | |
| "loss": 0.2979, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.3588199879590608, | |
| "grad_norm": 0.576806366443634, | |
| "learning_rate": 8.095098373442027e-07, | |
| "loss": 0.2915, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.3600240818783865, | |
| "grad_norm": 0.5598990321159363, | |
| "learning_rate": 8.078564992941401e-07, | |
| "loss": 0.2741, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3612281757977122, | |
| "grad_norm": 0.5614596009254456, | |
| "learning_rate": 8.061977219662092e-07, | |
| "loss": 0.2913, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3624322697170379, | |
| "grad_norm": 0.37974095344543457, | |
| "learning_rate": 8.045335346680611e-07, | |
| "loss": 0.2787, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.6439441442489624, | |
| "learning_rate": 8.028639668029309e-07, | |
| "loss": 0.2868, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.36484045755568933, | |
| "grad_norm": 0.46323299407958984, | |
| "learning_rate": 8.011890478691196e-07, | |
| "loss": 0.2831, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.3660445514750151, | |
| "grad_norm": 0.4963575005531311, | |
| "learning_rate": 7.995088074594713e-07, | |
| "loss": 0.2782, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.36724864539434077, | |
| "grad_norm": 0.6179429888725281, | |
| "learning_rate": 7.978232752608516e-07, | |
| "loss": 0.2703, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.36845273931366646, | |
| "grad_norm": 0.5127160549163818, | |
| "learning_rate": 7.961324810536223e-07, | |
| "loss": 0.3007, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.3696568332329922, | |
| "grad_norm": 0.45177775621414185, | |
| "learning_rate": 7.94436454711116e-07, | |
| "loss": 0.288, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.3708609271523179, | |
| "grad_norm": 0.47144508361816406, | |
| "learning_rate": 7.927352261991074e-07, | |
| "loss": 0.2901, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.3720650210716436, | |
| "grad_norm": 0.5511527061462402, | |
| "learning_rate": 7.910288255752844e-07, | |
| "loss": 0.2754, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.3732691149909693, | |
| "grad_norm": 0.5164305567741394, | |
| "learning_rate": 7.893172829887171e-07, | |
| "loss": 0.2847, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.374473208910295, | |
| "grad_norm": 0.5629504919052124, | |
| "learning_rate": 7.876006286793251e-07, | |
| "loss": 0.2953, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.3756773028296207, | |
| "grad_norm": 0.513200044631958, | |
| "learning_rate": 7.858788929773422e-07, | |
| "loss": 0.2702, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.37688139674894644, | |
| "grad_norm": 0.504371166229248, | |
| "learning_rate": 7.841521063027825e-07, | |
| "loss": 0.2873, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.37808549066827213, | |
| "grad_norm": 0.613593578338623, | |
| "learning_rate": 7.824202991649013e-07, | |
| "loss": 0.27, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.3792895845875978, | |
| "grad_norm": 0.7345304489135742, | |
| "learning_rate": 7.806835021616564e-07, | |
| "loss": 0.2895, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.38049367850692356, | |
| "grad_norm": 0.48514464497566223, | |
| "learning_rate": 7.789417459791681e-07, | |
| "loss": 0.2809, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.38169777242624925, | |
| "grad_norm": 0.4638960063457489, | |
| "learning_rate": 7.77195061391176e-07, | |
| "loss": 0.2839, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.38290186634557494, | |
| "grad_norm": 0.5008341073989868, | |
| "learning_rate": 7.754434792584968e-07, | |
| "loss": 0.2701, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.3841059602649007, | |
| "grad_norm": 0.5258957743644714, | |
| "learning_rate": 7.73687030528477e-07, | |
| "loss": 0.2709, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.38531005418422637, | |
| "grad_norm": 0.5781968832015991, | |
| "learning_rate": 7.719257462344481e-07, | |
| "loss": 0.2994, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.38651414810355206, | |
| "grad_norm": 0.5485130548477173, | |
| "learning_rate": 7.701596574951771e-07, | |
| "loss": 0.3001, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.3877182420228778, | |
| "grad_norm": 0.4708418846130371, | |
| "learning_rate": 7.683887955143169e-07, | |
| "loss": 0.2736, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.3889223359422035, | |
| "grad_norm": 0.5321612358093262, | |
| "learning_rate": 7.666131915798556e-07, | |
| "loss": 0.2892, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.3901264298615292, | |
| "grad_norm": 0.524898111820221, | |
| "learning_rate": 7.648328770635623e-07, | |
| "loss": 0.2897, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.3913305237808549, | |
| "grad_norm": 0.4973953664302826, | |
| "learning_rate": 7.630478834204351e-07, | |
| "loss": 0.2804, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.3925346177001806, | |
| "grad_norm": 0.5439997315406799, | |
| "learning_rate": 7.612582421881423e-07, | |
| "loss": 0.2824, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.3937387116195063, | |
| "grad_norm": 0.5040695667266846, | |
| "learning_rate": 7.594639849864681e-07, | |
| "loss": 0.2806, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.39494280553883204, | |
| "grad_norm": 0.57867830991745, | |
| "learning_rate": 7.576651435167523e-07, | |
| "loss": 0.2788, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.39614689945815773, | |
| "grad_norm": 0.43785402178764343, | |
| "learning_rate": 7.558617495613304e-07, | |
| "loss": 0.272, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 0.6042655110359192, | |
| "learning_rate": 7.540538349829725e-07, | |
| "loss": 0.2918, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.39855508729680916, | |
| "grad_norm": 0.6529451012611389, | |
| "learning_rate": 7.522414317243198e-07, | |
| "loss": 0.2882, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.39975918121613485, | |
| "grad_norm": 0.5043284296989441, | |
| "learning_rate": 7.50424571807321e-07, | |
| "loss": 0.2859, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.40096327513546054, | |
| "grad_norm": 0.44874584674835205, | |
| "learning_rate": 7.486032873326656e-07, | |
| "loss": 0.2912, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.4021673690547863, | |
| "grad_norm": 0.515211284160614, | |
| "learning_rate": 7.467776104792171e-07, | |
| "loss": 0.2747, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.40337146297411197, | |
| "grad_norm": 0.5425666570663452, | |
| "learning_rate": 7.449475735034453e-07, | |
| "loss": 0.2964, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4045755568934377, | |
| "grad_norm": 0.5557084083557129, | |
| "learning_rate": 7.431132087388546e-07, | |
| "loss": 0.2809, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.4057796508127634, | |
| "grad_norm": 0.4438600540161133, | |
| "learning_rate": 7.412745485954144e-07, | |
| "loss": 0.269, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.4069837447320891, | |
| "grad_norm": 0.586608350276947, | |
| "learning_rate": 7.394316255589854e-07, | |
| "loss": 0.2848, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.40818783865141484, | |
| "grad_norm": 0.6429834961891174, | |
| "learning_rate": 7.375844721907466e-07, | |
| "loss": 0.2917, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.4093919325707405, | |
| "grad_norm": 0.5150188207626343, | |
| "learning_rate": 7.35733121126619e-07, | |
| "loss": 0.2772, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.4105960264900662, | |
| "grad_norm": 0.5537393093109131, | |
| "learning_rate": 7.338776050766896e-07, | |
| "loss": 0.2819, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.41180012040939196, | |
| "grad_norm": 0.4834784269332886, | |
| "learning_rate": 7.320179568246333e-07, | |
| "loss": 0.2851, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.41300421432871764, | |
| "grad_norm": 0.6806831955909729, | |
| "learning_rate": 7.301542092271337e-07, | |
| "loss": 0.2841, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.41420830824804333, | |
| "grad_norm": 0.5081019997596741, | |
| "learning_rate": 7.282863952133022e-07, | |
| "loss": 0.2763, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.4154124021673691, | |
| "grad_norm": 0.5681424140930176, | |
| "learning_rate": 7.264145477840974e-07, | |
| "loss": 0.2719, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.41661649608669477, | |
| "grad_norm": 0.6257504820823669, | |
| "learning_rate": 7.245387000117404e-07, | |
| "loss": 0.2813, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.41782059000602045, | |
| "grad_norm": 0.5195356607437134, | |
| "learning_rate": 7.226588850391317e-07, | |
| "loss": 0.2761, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.4190246839253462, | |
| "grad_norm": 0.5490323305130005, | |
| "learning_rate": 7.207751360792647e-07, | |
| "loss": 0.291, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.4202287778446719, | |
| "grad_norm": 0.6458017230033875, | |
| "learning_rate": 7.188874864146397e-07, | |
| "loss": 0.2919, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.4214328717639976, | |
| "grad_norm": 0.5081551671028137, | |
| "learning_rate": 7.16995969396676e-07, | |
| "loss": 0.2762, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4226369656833233, | |
| "grad_norm": 0.6496263742446899, | |
| "learning_rate": 7.151006184451212e-07, | |
| "loss": 0.2766, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.423841059602649, | |
| "grad_norm": 0.6383594870567322, | |
| "learning_rate": 7.132014670474625e-07, | |
| "loss": 0.2829, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.4250451535219747, | |
| "grad_norm": 0.6374247074127197, | |
| "learning_rate": 7.112985487583333e-07, | |
| "loss": 0.2776, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.42624924744130044, | |
| "grad_norm": 0.48250874876976013, | |
| "learning_rate": 7.093918971989229e-07, | |
| "loss": 0.2794, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.4274533413606261, | |
| "grad_norm": 0.5055521726608276, | |
| "learning_rate": 7.07481546056379e-07, | |
| "loss": 0.2818, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.4286574352799518, | |
| "grad_norm": 0.558320164680481, | |
| "learning_rate": 7.055675290832157e-07, | |
| "loss": 0.29, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.42986152919927756, | |
| "grad_norm": 0.54196697473526, | |
| "learning_rate": 7.036498800967153e-07, | |
| "loss": 0.2819, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.43106562311860325, | |
| "grad_norm": 0.5442371368408203, | |
| "learning_rate": 7.017286329783314e-07, | |
| "loss": 0.3044, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.43226971703792894, | |
| "grad_norm": 0.531579315662384, | |
| "learning_rate": 6.9980382167309e-07, | |
| "loss": 0.2875, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.4334738109572547, | |
| "grad_norm": 0.6069034934043884, | |
| "learning_rate": 6.978754801889902e-07, | |
| "loss": 0.2915, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.43467790487658037, | |
| "grad_norm": 0.5376235246658325, | |
| "learning_rate": 6.959436425964033e-07, | |
| "loss": 0.2768, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.43588199879590606, | |
| "grad_norm": 0.5438763499259949, | |
| "learning_rate": 6.9400834302747e-07, | |
| "loss": 0.2911, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.4370860927152318, | |
| "grad_norm": 0.4325105547904968, | |
| "learning_rate": 6.920696156754985e-07, | |
| "loss": 0.269, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.4382901866345575, | |
| "grad_norm": 0.5107905864715576, | |
| "learning_rate": 6.901274947943597e-07, | |
| "loss": 0.2754, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.4394942805538832, | |
| "grad_norm": 0.5302306413650513, | |
| "learning_rate": 6.881820146978822e-07, | |
| "loss": 0.2835, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.4406983744732089, | |
| "grad_norm": 0.5489309430122375, | |
| "learning_rate": 6.862332097592457e-07, | |
| "loss": 0.2746, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.4419024683925346, | |
| "grad_norm": 0.4515032172203064, | |
| "learning_rate": 6.842811144103743e-07, | |
| "loss": 0.2829, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.44310656231186035, | |
| "grad_norm": 0.5359588861465454, | |
| "learning_rate": 6.823257631413275e-07, | |
| "loss": 0.2826, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.44431065623118604, | |
| "grad_norm": 0.49561506509780884, | |
| "learning_rate": 6.803671904996916e-07, | |
| "loss": 0.2946, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.44551475015051173, | |
| "grad_norm": 0.43841075897216797, | |
| "learning_rate": 6.784054310899683e-07, | |
| "loss": 0.2802, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.4467188440698375, | |
| "grad_norm": 0.7528261542320251, | |
| "learning_rate": 6.764405195729639e-07, | |
| "loss": 0.2829, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.44792293798916316, | |
| "grad_norm": 1.1440777778625488, | |
| "learning_rate": 6.744724906651774e-07, | |
| "loss": 0.2665, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.44912703190848885, | |
| "grad_norm": 0.5153807997703552, | |
| "learning_rate": 6.72501379138186e-07, | |
| "loss": 0.2754, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.4503311258278146, | |
| "grad_norm": 0.582036554813385, | |
| "learning_rate": 6.705272198180312e-07, | |
| "loss": 0.2818, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.4515352197471403, | |
| "grad_norm": 0.7196856737136841, | |
| "learning_rate": 6.685500475846044e-07, | |
| "loss": 0.2744, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.45273931366646597, | |
| "grad_norm": 1.0595272779464722, | |
| "learning_rate": 6.665698973710288e-07, | |
| "loss": 0.2602, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.4539434075857917, | |
| "grad_norm": 0.4910378158092499, | |
| "learning_rate": 6.645868041630439e-07, | |
| "loss": 0.2887, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.4551475015051174, | |
| "grad_norm": 0.4395122230052948, | |
| "learning_rate": 6.626008029983867e-07, | |
| "loss": 0.2771, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4563515954244431, | |
| "grad_norm": 0.5630185008049011, | |
| "learning_rate": 6.606119289661721e-07, | |
| "loss": 0.2976, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.45755568934376883, | |
| "grad_norm": 0.6062456965446472, | |
| "learning_rate": 6.58620217206274e-07, | |
| "loss": 0.2707, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4587597832630945, | |
| "grad_norm": 0.6882142424583435, | |
| "learning_rate": 6.566257029087039e-07, | |
| "loss": 0.2732, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.4599638771824202, | |
| "grad_norm": 0.4631926417350769, | |
| "learning_rate": 6.546284213129885e-07, | |
| "loss": 0.2794, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.46116797110174595, | |
| "grad_norm": 0.4465793967247009, | |
| "learning_rate": 6.526284077075488e-07, | |
| "loss": 0.2809, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.46237206502107164, | |
| "grad_norm": 0.5073222517967224, | |
| "learning_rate": 6.506256974290747e-07, | |
| "loss": 0.2908, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 0.5717306137084961, | |
| "learning_rate": 6.486203258619016e-07, | |
| "loss": 0.282, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.4647802528597231, | |
| "grad_norm": 0.5614638924598694, | |
| "learning_rate": 6.466123284373858e-07, | |
| "loss": 0.2764, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.46598434677904876, | |
| "grad_norm": 0.626006007194519, | |
| "learning_rate": 6.446017406332772e-07, | |
| "loss": 0.277, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.46718844069837445, | |
| "grad_norm": 0.47509709000587463, | |
| "learning_rate": 6.425885979730933e-07, | |
| "loss": 0.2828, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.4683925346177002, | |
| "grad_norm": 0.5545176267623901, | |
| "learning_rate": 6.405729360254914e-07, | |
| "loss": 0.2893, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.4695966285370259, | |
| "grad_norm": 0.4888879060745239, | |
| "learning_rate": 6.3855479040364e-07, | |
| "loss": 0.2811, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.4708007224563516, | |
| "grad_norm": 0.44063079357147217, | |
| "learning_rate": 6.365341967645902e-07, | |
| "loss": 0.2782, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.4720048163756773, | |
| "grad_norm": 0.5356207489967346, | |
| "learning_rate": 6.345111908086444e-07, | |
| "loss": 0.2658, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.473208910295003, | |
| "grad_norm": 0.5134460926055908, | |
| "learning_rate": 6.324858082787275e-07, | |
| "loss": 0.2782, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.4744130042143287, | |
| "grad_norm": 0.5685980916023254, | |
| "learning_rate": 6.304580849597527e-07, | |
| "loss": 0.2704, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.47561709813365444, | |
| "grad_norm": 0.8610411286354065, | |
| "learning_rate": 6.284280566779923e-07, | |
| "loss": 0.29, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.4768211920529801, | |
| "grad_norm": 0.5496920943260193, | |
| "learning_rate": 6.263957593004421e-07, | |
| "loss": 0.2704, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.4780252859723058, | |
| "grad_norm": 0.4593532383441925, | |
| "learning_rate": 6.243612287341896e-07, | |
| "loss": 0.2806, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.47922937989163156, | |
| "grad_norm": 0.5178139805793762, | |
| "learning_rate": 6.223245009257783e-07, | |
| "loss": 0.2683, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.48043347381095725, | |
| "grad_norm": 0.6350088119506836, | |
| "learning_rate": 6.20285611860573e-07, | |
| "loss": 0.2796, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.481637567730283, | |
| "grad_norm": 0.4848230183124542, | |
| "learning_rate": 6.182445975621246e-07, | |
| "loss": 0.2727, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4828416616496087, | |
| "grad_norm": 0.6039783358573914, | |
| "learning_rate": 6.162014940915323e-07, | |
| "loss": 0.295, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.48404575556893437, | |
| "grad_norm": 0.5623034834861755, | |
| "learning_rate": 6.141563375468082e-07, | |
| "loss": 0.2843, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.4852498494882601, | |
| "grad_norm": 0.5298231244087219, | |
| "learning_rate": 6.12109164062238e-07, | |
| "loss": 0.2685, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.4864539434075858, | |
| "grad_norm": 0.49439486861228943, | |
| "learning_rate": 6.100600098077431e-07, | |
| "loss": 0.2588, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.4876580373269115, | |
| "grad_norm": 0.4667768180370331, | |
| "learning_rate": 6.080089109882418e-07, | |
| "loss": 0.275, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.48886213124623723, | |
| "grad_norm": 0.5490863919258118, | |
| "learning_rate": 6.059559038430094e-07, | |
| "loss": 0.2837, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.4900662251655629, | |
| "grad_norm": 0.467192143201828, | |
| "learning_rate": 6.039010246450376e-07, | |
| "loss": 0.2733, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.4912703190848886, | |
| "grad_norm": 0.49663642048835754, | |
| "learning_rate": 6.018443097003945e-07, | |
| "loss": 0.2738, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.49247441300421435, | |
| "grad_norm": 0.501777708530426, | |
| "learning_rate": 5.997857953475823e-07, | |
| "loss": 0.2743, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.49367850692354004, | |
| "grad_norm": 0.5064652562141418, | |
| "learning_rate": 5.977255179568955e-07, | |
| "loss": 0.2748, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.4948826008428657, | |
| "grad_norm": 0.6248656511306763, | |
| "learning_rate": 5.956635139297783e-07, | |
| "loss": 0.2765, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.49608669476219147, | |
| "grad_norm": 0.45688706636428833, | |
| "learning_rate": 5.935998196981817e-07, | |
| "loss": 0.271, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.49729078868151716, | |
| "grad_norm": 0.7225250601768494, | |
| "learning_rate": 5.915344717239197e-07, | |
| "loss": 0.2853, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.49849488260084285, | |
| "grad_norm": 0.5863081812858582, | |
| "learning_rate": 5.894675064980246e-07, | |
| "loss": 0.2685, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.4996989765201686, | |
| "grad_norm": 0.5770187973976135, | |
| "learning_rate": 5.87398960540103e-07, | |
| "loss": 0.2774, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.5009030704394943, | |
| "grad_norm": 0.41943806409835815, | |
| "learning_rate": 5.8532887039769e-07, | |
| "loss": 0.2622, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.50210716435882, | |
| "grad_norm": 0.6374907493591309, | |
| "learning_rate": 5.832572726456039e-07, | |
| "loss": 0.2858, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.5033112582781457, | |
| "grad_norm": 0.5210843086242676, | |
| "learning_rate": 5.811842038852996e-07, | |
| "loss": 0.2706, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.5045153521974715, | |
| "grad_norm": 0.596387505531311, | |
| "learning_rate": 5.791097007442222e-07, | |
| "loss": 0.2823, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.5057194461167971, | |
| "grad_norm": 0.6676878929138184, | |
| "learning_rate": 5.7703379987516e-07, | |
| "loss": 0.2848, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5069235400361228, | |
| "grad_norm": 0.6097555160522461, | |
| "learning_rate": 5.749565379555961e-07, | |
| "loss": 0.2766, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.5081276339554486, | |
| "grad_norm": 0.6043739318847656, | |
| "learning_rate": 5.728779516870615e-07, | |
| "loss": 0.2885, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.5093317278747742, | |
| "grad_norm": 0.5565124750137329, | |
| "learning_rate": 5.707980777944859e-07, | |
| "loss": 0.2643, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.5105358217941, | |
| "grad_norm": 0.49649959802627563, | |
| "learning_rate": 5.687169530255487e-07, | |
| "loss": 0.2672, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.5117399157134257, | |
| "grad_norm": 0.49968451261520386, | |
| "learning_rate": 5.666346141500307e-07, | |
| "loss": 0.2754, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.5129440096327513, | |
| "grad_norm": 0.4982677698135376, | |
| "learning_rate": 5.645510979591634e-07, | |
| "loss": 0.2785, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.5141481035520771, | |
| "grad_norm": 0.904083251953125, | |
| "learning_rate": 5.624664412649797e-07, | |
| "loss": 0.2833, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.5153521974714028, | |
| "grad_norm": 0.5038682222366333, | |
| "learning_rate": 5.603806808996625e-07, | |
| "loss": 0.2746, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.5165562913907285, | |
| "grad_norm": 0.7115175724029541, | |
| "learning_rate": 5.58293853714895e-07, | |
| "loss": 0.2712, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.5177603853100542, | |
| "grad_norm": 0.5522176027297974, | |
| "learning_rate": 5.562059965812097e-07, | |
| "loss": 0.2869, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5189644792293799, | |
| "grad_norm": 0.6081178784370422, | |
| "learning_rate": 5.541171463873357e-07, | |
| "loss": 0.2751, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.5201685731487056, | |
| "grad_norm": 0.5689599514007568, | |
| "learning_rate": 5.52027340039548e-07, | |
| "loss": 0.2875, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.5213726670680313, | |
| "grad_norm": 0.43370601534843445, | |
| "learning_rate": 5.499366144610153e-07, | |
| "loss": 0.2673, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.5225767609873571, | |
| "grad_norm": 0.5115625262260437, | |
| "learning_rate": 5.478450065911473e-07, | |
| "loss": 0.2791, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.5237808549066827, | |
| "grad_norm": 0.518798291683197, | |
| "learning_rate": 5.45752553384942e-07, | |
| "loss": 0.277, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.5249849488260084, | |
| "grad_norm": 0.5628324151039124, | |
| "learning_rate": 5.436592918123337e-07, | |
| "loss": 0.2884, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.5261890427453342, | |
| "grad_norm": 0.47458890080451965, | |
| "learning_rate": 5.415652588575385e-07, | |
| "loss": 0.27, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.5273931366646598, | |
| "grad_norm": 0.6163709759712219, | |
| "learning_rate": 5.394704915184014e-07, | |
| "loss": 0.2643, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.5285972305839856, | |
| "grad_norm": 0.44985631108283997, | |
| "learning_rate": 5.373750268057431e-07, | |
| "loss": 0.2774, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 0.47572416067123413, | |
| "learning_rate": 5.352789017427052e-07, | |
| "loss": 0.278, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5310054184226369, | |
| "grad_norm": 0.5311432480812073, | |
| "learning_rate": 5.33182153364097e-07, | |
| "loss": 0.283, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.5322095123419627, | |
| "grad_norm": 0.5810163617134094, | |
| "learning_rate": 5.310848187157403e-07, | |
| "loss": 0.257, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.5334136062612884, | |
| "grad_norm": 0.8989514708518982, | |
| "learning_rate": 5.289869348538153e-07, | |
| "loss": 0.2846, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.534617700180614, | |
| "grad_norm": 0.4534051716327667, | |
| "learning_rate": 5.26888538844206e-07, | |
| "loss": 0.2836, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.5358217940999398, | |
| "grad_norm": 0.4670819938182831, | |
| "learning_rate": 5.247896677618452e-07, | |
| "loss": 0.2614, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.5370258880192655, | |
| "grad_norm": 0.5935913324356079, | |
| "learning_rate": 5.226903586900587e-07, | |
| "loss": 0.2826, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.5382299819385912, | |
| "grad_norm": 0.45839351415634155, | |
| "learning_rate": 5.205906487199119e-07, | |
| "loss": 0.2514, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.5394340758579169, | |
| "grad_norm": 0.4929831624031067, | |
| "learning_rate": 5.184905749495525e-07, | |
| "loss": 0.2815, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.5406381697772427, | |
| "grad_norm": 0.529437780380249, | |
| "learning_rate": 5.163901744835564e-07, | |
| "loss": 0.2744, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.5418422636965683, | |
| "grad_norm": 0.44370970129966736, | |
| "learning_rate": 5.14289484432271e-07, | |
| "loss": 0.2837, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.543046357615894, | |
| "grad_norm": 0.46680358052253723, | |
| "learning_rate": 5.121885419111611e-07, | |
| "loss": 0.2833, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.5442504515352198, | |
| "grad_norm": 0.5581067204475403, | |
| "learning_rate": 5.100873840401513e-07, | |
| "loss": 0.2846, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.4683559238910675, | |
| "learning_rate": 5.079860479429718e-07, | |
| "loss": 0.2666, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.5466586393738712, | |
| "grad_norm": 0.464067280292511, | |
| "learning_rate": 5.058845707465009e-07, | |
| "loss": 0.2693, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.5478627332931969, | |
| "grad_norm": 0.5715063214302063, | |
| "learning_rate": 5.037829895801106e-07, | |
| "loss": 0.2746, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5490668272125225, | |
| "grad_norm": 0.585356593132019, | |
| "learning_rate": 5.016813415750097e-07, | |
| "loss": 0.281, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.5502709211318483, | |
| "grad_norm": 0.4893047511577606, | |
| "learning_rate": 4.995796638635875e-07, | |
| "loss": 0.2799, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.551475015051174, | |
| "grad_norm": 1.0689632892608643, | |
| "learning_rate": 4.974779935787589e-07, | |
| "loss": 0.2574, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.5526791089704997, | |
| "grad_norm": 0.6054455637931824, | |
| "learning_rate": 4.953763678533068e-07, | |
| "loss": 0.2635, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.5538832028898254, | |
| "grad_norm": 0.46325477957725525, | |
| "learning_rate": 4.932748238192273e-07, | |
| "loss": 0.2769, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5550872968091511, | |
| "grad_norm": 0.5770764350891113, | |
| "learning_rate": 4.911733986070735e-07, | |
| "loss": 0.2671, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.5562913907284768, | |
| "grad_norm": 0.5715611577033997, | |
| "learning_rate": 4.890721293452979e-07, | |
| "loss": 0.2917, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.5574954846478025, | |
| "grad_norm": 0.5384266972541809, | |
| "learning_rate": 4.869710531595988e-07, | |
| "loss": 0.2771, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.5586995785671283, | |
| "grad_norm": 0.4611688256263733, | |
| "learning_rate": 4.848702071722629e-07, | |
| "loss": 0.2828, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5599036724864539, | |
| "grad_norm": 0.6118834018707275, | |
| "learning_rate": 4.827696285015094e-07, | |
| "loss": 0.2832, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5611077664057796, | |
| "grad_norm": 0.5026919841766357, | |
| "learning_rate": 4.806693542608348e-07, | |
| "loss": 0.2735, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.5623118603251054, | |
| "grad_norm": 0.548273503780365, | |
| "learning_rate": 4.785694215583566e-07, | |
| "loss": 0.2742, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.563515954244431, | |
| "grad_norm": 0.6186013221740723, | |
| "learning_rate": 4.764698674961581e-07, | |
| "loss": 0.2784, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.5647200481637568, | |
| "grad_norm": 0.45300328731536865, | |
| "learning_rate": 4.743707291696329e-07, | |
| "loss": 0.2786, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.5659241420830825, | |
| "grad_norm": 0.49064886569976807, | |
| "learning_rate": 4.7227204366682873e-07, | |
| "loss": 0.2747, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5671282360024081, | |
| "grad_norm": 0.5186241865158081, | |
| "learning_rate": 4.7017384806779336e-07, | |
| "loss": 0.2788, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.5683323299217339, | |
| "grad_norm": 0.5284368395805359, | |
| "learning_rate": 4.6807617944391843e-07, | |
| "loss": 0.264, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.5695364238410596, | |
| "grad_norm": 0.5770208239555359, | |
| "learning_rate": 4.6597907485728477e-07, | |
| "loss": 0.2759, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.5707405177603853, | |
| "grad_norm": 0.5039085149765015, | |
| "learning_rate": 4.6388257136000807e-07, | |
| "loss": 0.2807, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.571944611679711, | |
| "grad_norm": 1.2547776699066162, | |
| "learning_rate": 4.617867059935838e-07, | |
| "loss": 0.2651, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.5731487055990367, | |
| "grad_norm": 0.5457895398139954, | |
| "learning_rate": 4.5969151578823224e-07, | |
| "loss": 0.27, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.5743527995183624, | |
| "grad_norm": 0.4974658787250519, | |
| "learning_rate": 4.5759703776224555e-07, | |
| "loss": 0.2794, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.5755568934376881, | |
| "grad_norm": 0.5161871314048767, | |
| "learning_rate": 4.555033089213321e-07, | |
| "loss": 0.2816, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.5767609873570139, | |
| "grad_norm": 0.43015995621681213, | |
| "learning_rate": 4.534103662579642e-07, | |
| "loss": 0.267, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.5779650812763396, | |
| "grad_norm": 0.4864785969257355, | |
| "learning_rate": 4.5131824675072364e-07, | |
| "loss": 0.2793, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5791691751956652, | |
| "grad_norm": 0.6006112694740295, | |
| "learning_rate": 4.492269873636482e-07, | |
| "loss": 0.2689, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.580373269114991, | |
| "grad_norm": 0.4434204399585724, | |
| "learning_rate": 4.4713662504557927e-07, | |
| "loss": 0.2876, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.5815773630343167, | |
| "grad_norm": 0.565077543258667, | |
| "learning_rate": 4.450471967295083e-07, | |
| "loss": 0.2658, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.5827814569536424, | |
| "grad_norm": 0.5381281971931458, | |
| "learning_rate": 4.429587393319246e-07, | |
| "loss": 0.2715, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.5839855508729681, | |
| "grad_norm": 0.49021026492118835, | |
| "learning_rate": 4.408712897521633e-07, | |
| "loss": 0.2688, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.5851896447922939, | |
| "grad_norm": 0.5293102264404297, | |
| "learning_rate": 4.3878488487175323e-07, | |
| "loss": 0.2604, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.5863937387116195, | |
| "grad_norm": 0.6353856921195984, | |
| "learning_rate": 4.3669956155376476e-07, | |
| "loss": 0.2586, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.5875978326309452, | |
| "grad_norm": 0.5306446552276611, | |
| "learning_rate": 4.3461535664215923e-07, | |
| "loss": 0.2624, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.588801926550271, | |
| "grad_norm": 0.5957462191581726, | |
| "learning_rate": 4.325323069611383e-07, | |
| "loss": 0.2731, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.5900060204695966, | |
| "grad_norm": 0.6803829073905945, | |
| "learning_rate": 4.3045044931449156e-07, | |
| "loss": 0.2779, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.5912101143889223, | |
| "grad_norm": 0.5501326322555542, | |
| "learning_rate": 4.2836982048494854e-07, | |
| "loss": 0.2675, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.5924142083082481, | |
| "grad_norm": 0.49481987953186035, | |
| "learning_rate": 4.262904572335272e-07, | |
| "loss": 0.2725, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.5936183022275737, | |
| "grad_norm": 0.5254814028739929, | |
| "learning_rate": 4.242123962988851e-07, | |
| "loss": 0.2804, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.5948223961468995, | |
| "grad_norm": 0.5598310232162476, | |
| "learning_rate": 4.2213567439667037e-07, | |
| "loss": 0.2703, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 0.5715354681015015, | |
| "learning_rate": 4.200603282188724e-07, | |
| "loss": 0.2799, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.5972305839855508, | |
| "grad_norm": 0.6474336981773376, | |
| "learning_rate": 4.179863944331743e-07, | |
| "loss": 0.2799, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.5984346779048766, | |
| "grad_norm": 0.47116249799728394, | |
| "learning_rate": 4.15913909682305e-07, | |
| "loss": 0.2751, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.5996387718242023, | |
| "grad_norm": 0.5750442147254944, | |
| "learning_rate": 4.138429105833906e-07, | |
| "loss": 0.2719, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.600842865743528, | |
| "grad_norm": 0.5243822932243347, | |
| "learning_rate": 4.1177343372730923e-07, | |
| "loss": 0.2709, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.6020469596628537, | |
| "grad_norm": 0.5334904789924622, | |
| "learning_rate": 4.097055156780437e-07, | |
| "loss": 0.272, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 8305, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1967389652549632.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |