| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1000.0, |
| "global_step": 1120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008928571428571428, |
| "grad_norm": 7.46875, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 1.9798583984375, |
| "step": 1, |
| "token_acc": 0.5198793098029355 |
| }, |
| { |
| "epoch": 0.004464285714285714, |
| "grad_norm": 6.84375, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 1.942692756652832, |
| "step": 5, |
| "token_acc": 0.5277546839276701 |
| }, |
| { |
| "epoch": 0.008928571428571428, |
| "grad_norm": 6.0625, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.9170162200927734, |
| "step": 10, |
| "token_acc": 0.5350005891363262 |
| }, |
| { |
| "epoch": 0.013392857142857142, |
| "grad_norm": 4.46875, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 1.8599681854248047, |
| "step": 15, |
| "token_acc": 0.5440987793825112 |
| }, |
| { |
| "epoch": 0.017857142857142856, |
| "grad_norm": 3.375, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 1.773095703125, |
| "step": 20, |
| "token_acc": 0.558397212543554 |
| }, |
| { |
| "epoch": 0.022321428571428572, |
| "grad_norm": 2.59375, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 1.7239681243896485, |
| "step": 25, |
| "token_acc": 0.564640616036101 |
| }, |
| { |
| "epoch": 0.026785714285714284, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.0714285714285714e-05, |
| "loss": 1.662449264526367, |
| "step": 30, |
| "token_acc": 0.5744314346237109 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.25e-05, |
| "loss": 1.565970230102539, |
| "step": 35, |
| "token_acc": 0.5905128053468978 |
| }, |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 1.4979233741760254, |
| "step": 40, |
| "token_acc": 0.6035646975846081 |
| }, |
| { |
| "epoch": 0.04017857142857143, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.6071428571428572e-05, |
| "loss": 1.4658918380737305, |
| "step": 45, |
| "token_acc": 0.6079301236643054 |
| }, |
| { |
| "epoch": 0.044642857142857144, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.785714285714286e-05, |
| "loss": 1.422969436645508, |
| "step": 50, |
| "token_acc": 0.6154094170613891 |
| }, |
| { |
| "epoch": 0.049107142857142856, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.9642857142857145e-05, |
| "loss": 1.3724126815795898, |
| "step": 55, |
| "token_acc": 0.6238321824406662 |
| }, |
| { |
| "epoch": 0.05357142857142857, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.9999302568709548e-05, |
| "loss": 1.3787870407104492, |
| "step": 60, |
| "token_acc": 0.6205635902788131 |
| }, |
| { |
| "epoch": 0.05803571428571429, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.999646942081983e-05, |
| "loss": 1.3338838577270509, |
| "step": 65, |
| "token_acc": 0.628516364022137 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.999145758387301e-05, |
| "loss": 1.3212929725646974, |
| "step": 70, |
| "token_acc": 0.6295137500294056 |
| }, |
| { |
| "epoch": 0.06696428571428571, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.998426815017817e-05, |
| "loss": 1.290837574005127, |
| "step": 75, |
| "token_acc": 0.6369310469506665 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.997490268664256e-05, |
| "loss": 1.2963342666625977, |
| "step": 80, |
| "token_acc": 0.6334583410737507 |
| }, |
| { |
| "epoch": 0.07589285714285714, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.996336323443013e-05, |
| "loss": 1.2906134605407715, |
| "step": 85, |
| "token_acc": 0.6352960586401749 |
| }, |
| { |
| "epoch": 0.08035714285714286, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.9949652308516635e-05, |
| "loss": 1.2730415344238282, |
| "step": 90, |
| "token_acc": 0.6381611754746083 |
| }, |
| { |
| "epoch": 0.08482142857142858, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.9933772897141525e-05, |
| "loss": 1.2450992584228515, |
| "step": 95, |
| "token_acc": 0.6466158412384333 |
| }, |
| { |
| "epoch": 0.08928571428571429, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.991572846115666e-05, |
| "loss": 1.2471370697021484, |
| "step": 100, |
| "token_acc": 0.6422763263951449 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.9895522933272028e-05, |
| "loss": 1.2441038131713866, |
| "step": 105, |
| "token_acc": 0.6425760561463307 |
| }, |
| { |
| "epoch": 0.09821428571428571, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.9873160717198655e-05, |
| "loss": 1.230722427368164, |
| "step": 110, |
| "token_acc": 0.6461212036192747 |
| }, |
| { |
| "epoch": 0.10267857142857142, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.9848646686688798e-05, |
| "loss": 1.2274154663085937, |
| "step": 115, |
| "token_acc": 0.645449105054132 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.9821986184473757e-05, |
| "loss": 1.2445575714111328, |
| "step": 120, |
| "token_acc": 0.640380009025793 |
| }, |
| { |
| "epoch": 0.11160714285714286, |
| "grad_norm": 1.375, |
| "learning_rate": 1.9793185021099426e-05, |
| "loss": 1.2152713775634765, |
| "step": 125, |
| "token_acc": 0.650086875941156 |
| }, |
| { |
| "epoch": 0.11607142857142858, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.9762249473659936e-05, |
| "loss": 1.2223292350769044, |
| "step": 130, |
| "token_acc": 0.646190244076421 |
| }, |
| { |
| "epoch": 0.12053571428571429, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.9729186284429567e-05, |
| "loss": 1.20882568359375, |
| "step": 135, |
| "token_acc": 0.6505111612683495 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 1.4765625, |
| "learning_rate": 1.9694002659393306e-05, |
| "loss": 1.198903465270996, |
| "step": 140, |
| "token_acc": 0.6506264468770722 |
| }, |
| { |
| "epoch": 0.12946428571428573, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.965670626667633e-05, |
| "loss": 1.18528413772583, |
| "step": 145, |
| "token_acc": 0.6555728824688115 |
| }, |
| { |
| "epoch": 0.13392857142857142, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.9617305234872773e-05, |
| "loss": 1.2123258590698243, |
| "step": 150, |
| "token_acc": 0.648010957945968 |
| }, |
| { |
| "epoch": 0.13839285714285715, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.9575808151274133e-05, |
| "loss": 1.20123291015625, |
| "step": 155, |
| "token_acc": 0.6520161821001254 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.9532224059997693e-05, |
| "loss": 1.2176162719726562, |
| "step": 160, |
| "token_acc": 0.6464380895343614 |
| }, |
| { |
| "epoch": 0.14732142857142858, |
| "grad_norm": 1.375, |
| "learning_rate": 1.948656246001542e-05, |
| "loss": 1.1905139923095702, |
| "step": 165, |
| "token_acc": 0.6515007571439264 |
| }, |
| { |
| "epoch": 0.15178571428571427, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.9438833303083677e-05, |
| "loss": 1.2004039764404297, |
| "step": 170, |
| "token_acc": 0.650028138976926 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.9389046991574298e-05, |
| "loss": 1.1866092681884766, |
| "step": 175, |
| "token_acc": 0.6536794480531047 |
| }, |
| { |
| "epoch": 0.16071428571428573, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.9337214376207417e-05, |
| "loss": 1.1794092178344726, |
| "step": 180, |
| "token_acc": 0.6547250738292786 |
| }, |
| { |
| "epoch": 0.16517857142857142, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.9283346753686625e-05, |
| "loss": 1.1931296348571778, |
| "step": 185, |
| "token_acc": 0.6512469949273786 |
| }, |
| { |
| "epoch": 0.16964285714285715, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.922745586423687e-05, |
| "loss": 1.175852394104004, |
| "step": 190, |
| "token_acc": 0.6550968319583254 |
| }, |
| { |
| "epoch": 0.17410714285714285, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.9169553889045732e-05, |
| "loss": 1.1729495048522949, |
| "step": 195, |
| "token_acc": 0.6564944949906185 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.9109653447608607e-05, |
| "loss": 1.1875883102416993, |
| "step": 200, |
| "token_acc": 0.6519358095416979 |
| }, |
| { |
| "epoch": 0.18303571428571427, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.9047767594978308e-05, |
| "loss": 1.1628761291503906, |
| "step": 205, |
| "token_acc": 0.6568504306122326 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.898390981891979e-05, |
| "loss": 1.1821978569030762, |
| "step": 210, |
| "token_acc": 0.6514381801603373 |
| }, |
| { |
| "epoch": 0.19196428571428573, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.891809403697054e-05, |
| "loss": 1.1805411338806153, |
| "step": 215, |
| "token_acc": 0.6520954938127481 |
| }, |
| { |
| "epoch": 0.19642857142857142, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.885033459340731e-05, |
| "loss": 1.1788909912109375, |
| "step": 220, |
| "token_acc": 0.6525619317127482 |
| }, |
| { |
| "epoch": 0.20089285714285715, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.8780646256119843e-05, |
| "loss": 1.1617810249328613, |
| "step": 225, |
| "token_acc": 0.6574105324410824 |
| }, |
| { |
| "epoch": 0.20535714285714285, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.8709044213392265e-05, |
| "loss": 1.1675668716430665, |
| "step": 230, |
| "token_acc": 0.6567929066970355 |
| }, |
| { |
| "epoch": 0.20982142857142858, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.8635544070592876e-05, |
| "loss": 1.1429882049560547, |
| "step": 235, |
| "token_acc": 0.6626304648850445 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.8560161846773002e-05, |
| "loss": 1.1523635864257813, |
| "step": 240, |
| "token_acc": 0.6588007349650185 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.8482913971175737e-05, |
| "loss": 1.1638723373413087, |
| "step": 245, |
| "token_acc": 0.6575461339567173 |
| }, |
| { |
| "epoch": 0.22321428571428573, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.8403817279655237e-05, |
| "loss": 1.1588207244873048, |
| "step": 250, |
| "token_acc": 0.6566742318540298 |
| }, |
| { |
| "epoch": 0.22767857142857142, |
| "grad_norm": 1.375, |
| "learning_rate": 1.8322889011007424e-05, |
| "loss": 1.1697870254516602, |
| "step": 255, |
| "token_acc": 0.6530151356474628 |
| }, |
| { |
| "epoch": 0.23214285714285715, |
| "grad_norm": 1.375, |
| "learning_rate": 1.8240146803212854e-05, |
| "loss": 1.163081169128418, |
| "step": 260, |
| "token_acc": 0.654347747769385 |
| }, |
| { |
| "epoch": 0.23660714285714285, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.8155608689592604e-05, |
| "loss": 1.1553804397583007, |
| "step": 265, |
| "token_acc": 0.656771626981819 |
| }, |
| { |
| "epoch": 0.24107142857142858, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.8069293094877974e-05, |
| "loss": 1.1408929824829102, |
| "step": 270, |
| "token_acc": 0.6625453583525501 |
| }, |
| { |
| "epoch": 0.24553571428571427, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.7981218831194904e-05, |
| "loss": 1.1588726043701172, |
| "step": 275, |
| "token_acc": 0.6563144046706936 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.789140509396394e-05, |
| "loss": 1.1532201766967773, |
| "step": 280, |
| "token_acc": 0.6581244540600577 |
| }, |
| { |
| "epoch": 0.2544642857142857, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.7799871457716665e-05, |
| "loss": 1.1504724502563477, |
| "step": 285, |
| "token_acc": 0.6587934607950444 |
| }, |
| { |
| "epoch": 0.25892857142857145, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.770663787182954e-05, |
| "loss": 1.1456276893615722, |
| "step": 290, |
| "token_acc": 0.6618624162066538 |
| }, |
| { |
| "epoch": 0.26339285714285715, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.7611724656175982e-05, |
| "loss": 1.1442519187927247, |
| "step": 295, |
| "token_acc": 0.6591544798140778 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 1.375, |
| "learning_rate": 1.7515152496697765e-05, |
| "loss": 1.152108383178711, |
| "step": 300, |
| "token_acc": 0.6586486613213298 |
| }, |
| { |
| "epoch": 0.27232142857142855, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.7416942440896577e-05, |
| "loss": 1.1452009201049804, |
| "step": 305, |
| "token_acc": 0.6591358039294544 |
| }, |
| { |
| "epoch": 0.2767857142857143, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.7317115893246833e-05, |
| "loss": 1.1422765731811524, |
| "step": 310, |
| "token_acc": 0.6589653054958551 |
| }, |
| { |
| "epoch": 0.28125, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.7215694610530624e-05, |
| "loss": 1.1257204055786132, |
| "step": 315, |
| "token_acc": 0.6644322342183476 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.7112700697095955e-05, |
| "loss": 1.150202178955078, |
| "step": 320, |
| "token_acc": 0.657143724020753 |
| }, |
| { |
| "epoch": 0.29017857142857145, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.7008156600039157e-05, |
| "loss": 1.1264986038208007, |
| "step": 325, |
| "token_acc": 0.6647021643690532 |
| }, |
| { |
| "epoch": 0.29464285714285715, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.690208510431267e-05, |
| "loss": 1.1261561393737793, |
| "step": 330, |
| "token_acc": 0.6645312870725624 |
| }, |
| { |
| "epoch": 0.29910714285714285, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.6794509327759132e-05, |
| "loss": 1.1390050888061523, |
| "step": 335, |
| "token_acc": 0.6606452523992492 |
| }, |
| { |
| "epoch": 0.30357142857142855, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.6685452716072946e-05, |
| "loss": 1.128352451324463, |
| "step": 340, |
| "token_acc": 0.6637220739566702 |
| }, |
| { |
| "epoch": 0.3080357142857143, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.6574939037690394e-05, |
| "loss": 1.1525999069213868, |
| "step": 345, |
| "token_acc": 0.6566719096261906 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.646299237860941e-05, |
| "loss": 1.1295086860656738, |
| "step": 350, |
| "token_acc": 0.6623984060840936 |
| }, |
| { |
| "epoch": 0.3169642857142857, |
| "grad_norm": 1.375, |
| "learning_rate": 1.634963713714012e-05, |
| "loss": 1.1322021484375, |
| "step": 355, |
| "token_acc": 0.6617089233387465 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.6234898018587336e-05, |
| "loss": 1.1252225875854491, |
| "step": 360, |
| "token_acc": 0.6641228446029669 |
| }, |
| { |
| "epoch": 0.32589285714285715, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.6118800029866157e-05, |
| "loss": 1.1185049057006835, |
| "step": 365, |
| "token_acc": 0.6656541637014795 |
| }, |
| { |
| "epoch": 0.33035714285714285, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.600136847405179e-05, |
| "loss": 1.1188979148864746, |
| "step": 370, |
| "token_acc": 0.6649322180559561 |
| }, |
| { |
| "epoch": 0.33482142857142855, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.5882628944864862e-05, |
| "loss": 1.1338905334472655, |
| "step": 375, |
| "token_acc": 0.6627974764645106 |
| }, |
| { |
| "epoch": 0.3392857142857143, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.5762607321093368e-05, |
| "loss": 1.1297473907470703, |
| "step": 380, |
| "token_acc": 0.662811429370499 |
| }, |
| { |
| "epoch": 0.34375, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.5641329760952514e-05, |
| "loss": 1.1086080551147461, |
| "step": 385, |
| "token_acc": 0.6686927987727868 |
| }, |
| { |
| "epoch": 0.3482142857142857, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.5518822696383612e-05, |
| "loss": 1.1249174118041991, |
| "step": 390, |
| "token_acc": 0.6625904370106592 |
| }, |
| { |
| "epoch": 0.35267857142857145, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.539511282729338e-05, |
| "loss": 1.1013822555541992, |
| "step": 395, |
| "token_acc": 0.670699511008058 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.527022711573479e-05, |
| "loss": 1.1248649597167968, |
| "step": 400, |
| "token_acc": 0.6626294342018012 |
| }, |
| { |
| "epoch": 0.36160714285714285, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.51441927800308e-05, |
| "loss": 1.119422721862793, |
| "step": 405, |
| "token_acc": 0.6646745395834887 |
| }, |
| { |
| "epoch": 0.36607142857142855, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.5017037288842238e-05, |
| "loss": 1.1286213874816895, |
| "step": 410, |
| "token_acc": 0.6624454043387759 |
| }, |
| { |
| "epoch": 0.3705357142857143, |
| "grad_norm": 1.375, |
| "learning_rate": 1.4888788355181128e-05, |
| "loss": 1.1240810394287108, |
| "step": 415, |
| "token_acc": 0.661629880136473 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.4759473930370738e-05, |
| "loss": 1.122232151031494, |
| "step": 420, |
| "token_acc": 0.6647913929040736 |
| }, |
| { |
| "epoch": 0.3794642857142857, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.4629122197953716e-05, |
| "loss": 1.127860927581787, |
| "step": 425, |
| "token_acc": 0.6610134748138546 |
| }, |
| { |
| "epoch": 0.38392857142857145, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.4497761567549602e-05, |
| "loss": 1.1123634338378907, |
| "step": 430, |
| "token_acc": 0.6672016743978223 |
| }, |
| { |
| "epoch": 0.38839285714285715, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.4365420668663075e-05, |
| "loss": 1.1055331230163574, |
| "step": 435, |
| "token_acc": 0.667418617766589 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.4232128344444251e-05, |
| "loss": 1.1390070915222168, |
| "step": 440, |
| "token_acc": 0.6599930821407843 |
| }, |
| { |
| "epoch": 0.39732142857142855, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.4097913645402463e-05, |
| "loss": 1.119845199584961, |
| "step": 445, |
| "token_acc": 0.6643133551388953 |
| }, |
| { |
| "epoch": 0.4017857142857143, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.396280582307481e-05, |
| "loss": 1.1190576553344727, |
| "step": 450, |
| "token_acc": 0.6644705200637404 |
| }, |
| { |
| "epoch": 0.40625, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.3826834323650899e-05, |
| "loss": 1.114608383178711, |
| "step": 455, |
| "token_acc": 0.6656076250992852 |
| }, |
| { |
| "epoch": 0.4107142857142857, |
| "grad_norm": 1.375, |
| "learning_rate": 1.369002878155519e-05, |
| "loss": 1.122308349609375, |
| "step": 460, |
| "token_acc": 0.6635312122906672 |
| }, |
| { |
| "epoch": 0.41517857142857145, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.3552419012988284e-05, |
| "loss": 1.112066650390625, |
| "step": 465, |
| "token_acc": 0.6657017205282056 |
| }, |
| { |
| "epoch": 0.41964285714285715, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.3414035009428598e-05, |
| "loss": 1.1216192245483398, |
| "step": 470, |
| "token_acc": 0.663814267923857 |
| }, |
| { |
| "epoch": 0.42410714285714285, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.3274906931095863e-05, |
| "loss": 1.1180400848388672, |
| "step": 475, |
| "token_acc": 0.6634265241395513 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.3135065100377816e-05, |
| "loss": 1.1156521797180177, |
| "step": 480, |
| "token_acc": 0.664764806569064 |
| }, |
| { |
| "epoch": 0.4330357142857143, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.2994539995221564e-05, |
| "loss": 1.115281867980957, |
| "step": 485, |
| "token_acc": 0.6655095086932463 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.2853362242491054e-05, |
| "loss": 1.1114022254943847, |
| "step": 490, |
| "token_acc": 0.6649909487491468 |
| }, |
| { |
| "epoch": 0.4419642857142857, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.2711562611292063e-05, |
| "loss": 1.1045937538146973, |
| "step": 495, |
| "token_acc": 0.6681451286030272 |
| }, |
| { |
| "epoch": 0.44642857142857145, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.2569172006266192e-05, |
| "loss": 1.1057794570922852, |
| "step": 500, |
| "token_acc": 0.6668334494856537 |
| }, |
| { |
| "epoch": 0.45089285714285715, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.2426221460855352e-05, |
| "loss": 1.1071063041687013, |
| "step": 505, |
| "token_acc": 0.6664125673607487 |
| }, |
| { |
| "epoch": 0.45535714285714285, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.2282742130538121e-05, |
| "loss": 1.0995834350585938, |
| "step": 510, |
| "token_acc": 0.6681870274068059 |
| }, |
| { |
| "epoch": 0.45982142857142855, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.2138765286039573e-05, |
| "loss": 1.1039738655090332, |
| "step": 515, |
| "token_acc": 0.668202407915127 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.1994322306515926e-05, |
| "loss": 1.11993465423584, |
| "step": 520, |
| "token_acc": 0.6619411576211444 |
| }, |
| { |
| "epoch": 0.46875, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.1849444672715587e-05, |
| "loss": 1.0845392227172852, |
| "step": 525, |
| "token_acc": 0.6731254973876336 |
| }, |
| { |
| "epoch": 0.4732142857142857, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.1704163960118069e-05, |
| "loss": 1.0899698257446289, |
| "step": 530, |
| "token_acc": 0.6714283226626576 |
| }, |
| { |
| "epoch": 0.47767857142857145, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.155851183205224e-05, |
| "loss": 1.1180584907531739, |
| "step": 535, |
| "token_acc": 0.6640847703163699 |
| }, |
| { |
| "epoch": 0.48214285714285715, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.141252003279542e-05, |
| "loss": 1.0958803176879883, |
| "step": 540, |
| "token_acc": 0.6683001406764094 |
| }, |
| { |
| "epoch": 0.48660714285714285, |
| "grad_norm": 1.25, |
| "learning_rate": 1.1266220380654862e-05, |
| "loss": 1.0876192092895507, |
| "step": 545, |
| "token_acc": 0.674294498100293 |
| }, |
| { |
| "epoch": 0.49107142857142855, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.1119644761033079e-05, |
| "loss": 1.1089275360107422, |
| "step": 550, |
| "token_acc": 0.6656800260519644 |
| }, |
| { |
| "epoch": 0.4955357142857143, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.097282511947855e-05, |
| "loss": 1.105890655517578, |
| "step": 555, |
| "token_acc": 0.6669049856720307 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.0825793454723325e-05, |
| "loss": 1.0942912101745605, |
| "step": 560, |
| "token_acc": 0.6705247046279258 |
| }, |
| { |
| "epoch": 0.5044642857142857, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.0678581811709025e-05, |
| "loss": 1.0961302757263183, |
| "step": 565, |
| "token_acc": 0.6700240310483472 |
| }, |
| { |
| "epoch": 0.5089285714285714, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.0531222274602795e-05, |
| "loss": 1.0944637298583983, |
| "step": 570, |
| "token_acc": 0.6694562628384824 |
| }, |
| { |
| "epoch": 0.5133928571428571, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.0383746959804672e-05, |
| "loss": 1.0805794715881347, |
| "step": 575, |
| "token_acc": 0.6751718465425063 |
| }, |
| { |
| "epoch": 0.5178571428571429, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.023618800894798e-05, |
| "loss": 1.0854421615600587, |
| "step": 580, |
| "token_acc": 0.6732954215748854 |
| }, |
| { |
| "epoch": 0.5223214285714286, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.0088577581894154e-05, |
| "loss": 1.0917674064636231, |
| "step": 585, |
| "token_acc": 0.670189745515675 |
| }, |
| { |
| "epoch": 0.5267857142857143, |
| "grad_norm": 1.3203125, |
| "learning_rate": 9.94094784972367e-06, |
| "loss": 1.0841856002807617, |
| "step": 590, |
| "token_acc": 0.6734028220945898 |
| }, |
| { |
| "epoch": 0.53125, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.79333098772446e-06, |
| "loss": 1.1098053932189942, |
| "step": 595, |
| "token_acc": 0.6651632518802563 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9.645759168379463e-06, |
| "loss": 1.1012333869934081, |
| "step": 600, |
| "token_acc": 0.667337838941999 |
| }, |
| { |
| "epoch": 0.5401785714285714, |
| "grad_norm": 1.3125, |
| "learning_rate": 9.498264554354761e-06, |
| "loss": 1.0885583877563476, |
| "step": 605, |
| "token_acc": 0.6714689082367089 |
| }, |
| { |
| "epoch": 0.5446428571428571, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.350879291489848e-06, |
| "loss": 1.104905128479004, |
| "step": 610, |
| "token_acc": 0.6663167322052396 |
| }, |
| { |
| "epoch": 0.5491071428571429, |
| "grad_norm": 1.2734375, |
| "learning_rate": 9.203635501791595e-06, |
| "loss": 1.1066022872924806, |
| "step": 615, |
| "token_acc": 0.6652294646322893 |
| }, |
| { |
| "epoch": 0.5535714285714286, |
| "grad_norm": 1.2890625, |
| "learning_rate": 9.056565276433378e-06, |
| "loss": 1.1055733680725097, |
| "step": 620, |
| "token_acc": 0.6665237252904985 |
| }, |
| { |
| "epoch": 0.5580357142857143, |
| "grad_norm": 1.3515625, |
| "learning_rate": 8.909700668760945e-06, |
| "loss": 1.1153239250183105, |
| "step": 625, |
| "token_acc": 0.6633878551716786 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 1.265625, |
| "learning_rate": 8.763073687306523e-06, |
| "loss": 1.1114237785339356, |
| "step": 630, |
| "token_acc": 0.665496981051093 |
| }, |
| { |
| "epoch": 0.5669642857142857, |
| "grad_norm": 1.3203125, |
| "learning_rate": 8.616716288812694e-06, |
| "loss": 1.0952648162841796, |
| "step": 635, |
| "token_acc": 0.6696672683549216 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.28125, |
| "learning_rate": 8.47066037126754e-06, |
| "loss": 1.1008512496948242, |
| "step": 640, |
| "token_acc": 0.6676704190666269 |
| }, |
| { |
| "epoch": 0.5758928571428571, |
| "grad_norm": 1.3515625, |
| "learning_rate": 8.324937766952638e-06, |
| "loss": 1.1116990089416503, |
| "step": 645, |
| "token_acc": 0.6651052619143255 |
| }, |
| { |
| "epoch": 0.5803571428571429, |
| "grad_norm": 1.3046875, |
| "learning_rate": 8.17958023550531e-06, |
| "loss": 1.1096595764160155, |
| "step": 650, |
| "token_acc": 0.6650905800358365 |
| }, |
| { |
| "epoch": 0.5848214285714286, |
| "grad_norm": 1.2890625, |
| "learning_rate": 8.03461945699677e-06, |
| "loss": 1.0997350692749024, |
| "step": 655, |
| "token_acc": 0.66805490553854 |
| }, |
| { |
| "epoch": 0.5892857142857143, |
| "grad_norm": 1.2265625, |
| "learning_rate": 7.89008702502758e-06, |
| "loss": 1.1013197898864746, |
| "step": 660, |
| "token_acc": 0.6695434588695944 |
| }, |
| { |
| "epoch": 0.59375, |
| "grad_norm": 1.3359375, |
| "learning_rate": 7.746014439841941e-06, |
| "loss": 1.1132354736328125, |
| "step": 665, |
| "token_acc": 0.6643929538342533 |
| }, |
| { |
| "epoch": 0.5982142857142857, |
| "grad_norm": 1.265625, |
| "learning_rate": 7.602433101462351e-06, |
| "loss": 1.1085187911987304, |
| "step": 670, |
| "token_acc": 0.6662411498410222 |
| }, |
| { |
| "epoch": 0.6026785714285714, |
| "grad_norm": 1.328125, |
| "learning_rate": 7.459374302846114e-06, |
| "loss": 1.0951228141784668, |
| "step": 675, |
| "token_acc": 0.6677065047207857 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 1.375, |
| "learning_rate": 7.316869223065156e-06, |
| "loss": 1.114619827270508, |
| "step": 680, |
| "token_acc": 0.6641058671975641 |
| }, |
| { |
| "epoch": 0.6116071428571429, |
| "grad_norm": 1.296875, |
| "learning_rate": 7.174948920510675e-06, |
| "loss": 1.0971644401550293, |
| "step": 685, |
| "token_acc": 0.6688319741107969 |
| }, |
| { |
| "epoch": 0.6160714285714286, |
| "grad_norm": 1.296875, |
| "learning_rate": 7.033644326124104e-06, |
| "loss": 1.104668140411377, |
| "step": 690, |
| "token_acc": 0.6667567008413406 |
| }, |
| { |
| "epoch": 0.6205357142857143, |
| "grad_norm": 1.3359375, |
| "learning_rate": 6.892986236655827e-06, |
| "loss": 1.114396095275879, |
| "step": 695, |
| "token_acc": 0.6627671848794471 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 1.3046875, |
| "learning_rate": 6.7530053079531664e-06, |
| "loss": 1.098177146911621, |
| "step": 700, |
| "token_acc": 0.668855782018009 |
| }, |
| { |
| "epoch": 0.6294642857142857, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.613732048279064e-06, |
| "loss": 1.0986656188964843, |
| "step": 705, |
| "token_acc": 0.6675273764392436 |
| }, |
| { |
| "epoch": 0.6339285714285714, |
| "grad_norm": 1.2890625, |
| "learning_rate": 6.475196811662929e-06, |
| "loss": 1.0969505310058594, |
| "step": 710, |
| "token_acc": 0.6685544895216992 |
| }, |
| { |
| "epoch": 0.6383928571428571, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.337429791285107e-06, |
| "loss": 1.0815807342529298, |
| "step": 715, |
| "token_acc": 0.6731986752641865 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.200461012896401e-06, |
| "loss": 1.0996244430541993, |
| "step": 720, |
| "token_acc": 0.66839132010241 |
| }, |
| { |
| "epoch": 0.6473214285714286, |
| "grad_norm": 1.3125, |
| "learning_rate": 6.064320328274079e-06, |
| "loss": 1.0926881790161134, |
| "step": 725, |
| "token_acc": 0.6687555529230587 |
| }, |
| { |
| "epoch": 0.6517857142857143, |
| "grad_norm": 1.3359375, |
| "learning_rate": 5.929037408715812e-06, |
| "loss": 1.1201751708984375, |
| "step": 730, |
| "token_acc": 0.6626515495086923 |
| }, |
| { |
| "epoch": 0.65625, |
| "grad_norm": 1.328125, |
| "learning_rate": 5.794641738572925e-06, |
| "loss": 1.1207469940185546, |
| "step": 735, |
| "token_acc": 0.6625378393718664 |
| }, |
| { |
| "epoch": 0.6607142857142857, |
| "grad_norm": 1.2578125, |
| "learning_rate": 5.66116260882442e-06, |
| "loss": 1.0935090065002442, |
| "step": 740, |
| "token_acc": 0.6697370980003889 |
| }, |
| { |
| "epoch": 0.6651785714285714, |
| "grad_norm": 1.3046875, |
| "learning_rate": 5.528629110693111e-06, |
| "loss": 1.1128035545349122, |
| "step": 745, |
| "token_acc": 0.6645328465003976 |
| }, |
| { |
| "epoch": 0.6696428571428571, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.397070129305343e-06, |
| "loss": 1.0865594863891601, |
| "step": 750, |
| "token_acc": 0.6718154777435683 |
| }, |
| { |
| "epoch": 0.6741071428571429, |
| "grad_norm": 1.2734375, |
| "learning_rate": 5.2665143373955476e-06, |
| "loss": 1.105082130432129, |
| "step": 755, |
| "token_acc": 0.6654881601202781 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.136990189057187e-06, |
| "loss": 1.0965201377868652, |
| "step": 760, |
| "token_acc": 0.6685612497216721 |
| }, |
| { |
| "epoch": 0.6830357142857143, |
| "grad_norm": 1.234375, |
| "learning_rate": 5.008525913541292e-06, |
| "loss": 1.0776897430419923, |
| "step": 765, |
| "token_acc": 0.673377759708695 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.881149509103993e-06, |
| "loss": 1.1034416198730468, |
| "step": 770, |
| "token_acc": 0.6676547557024715 |
| }, |
| { |
| "epoch": 0.6919642857142857, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.754888736904432e-06, |
| "loss": 1.111644172668457, |
| "step": 775, |
| "token_acc": 0.6635881969829432 |
| }, |
| { |
| "epoch": 0.6964285714285714, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.629771114954341e-06, |
| "loss": 1.1045246124267578, |
| "step": 780, |
| "token_acc": 0.6661986169129779 |
| }, |
| { |
| "epoch": 0.7008928571428571, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.505823912120586e-06, |
| "loss": 1.083775806427002, |
| "step": 785, |
| "token_acc": 0.6717203862042463 |
| }, |
| { |
| "epoch": 0.7053571428571429, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.3830741421820376e-06, |
| "loss": 1.0916669845581055, |
| "step": 790, |
| "token_acc": 0.670933072789448 |
| }, |
| { |
| "epoch": 0.7098214285714286, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.261548557942047e-06, |
| "loss": 1.11307373046875, |
| "step": 795, |
| "token_acc": 0.6643882351532854 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 1.25, |
| "learning_rate": 4.1412736453977545e-06, |
| "loss": 1.0729114532470703, |
| "step": 800, |
| "token_acc": 0.6777025043600384 |
| }, |
| { |
| "epoch": 0.71875, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.0222756179675915e-06, |
| "loss": 1.0922969818115233, |
| "step": 805, |
| "token_acc": 0.669288262867004 |
| }, |
| { |
| "epoch": 0.7232142857142857, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.904580410778185e-06, |
| "loss": 1.1067237854003906, |
| "step": 810, |
| "token_acc": 0.6661032228983548 |
| }, |
| { |
| "epoch": 0.7276785714285714, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.7882136750118823e-06, |
| "loss": 1.0945161819458007, |
| "step": 815, |
| "token_acc": 0.6704976778178561 |
| }, |
| { |
| "epoch": 0.7321428571428571, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.6732007723161933e-06, |
| "loss": 1.0965933799743652, |
| "step": 820, |
| "token_acc": 0.6691131570294518 |
| }, |
| { |
| "epoch": 0.7366071428571429, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.5595667692763346e-06, |
| "loss": 1.0896465301513671, |
| "step": 825, |
| "token_acc": 0.6698150243632081 |
| }, |
| { |
| "epoch": 0.7410714285714286, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.447336431952052e-06, |
| "loss": 1.103147315979004, |
| "step": 830, |
| "token_acc": 0.6670983919710921 |
| }, |
| { |
| "epoch": 0.7455357142857143, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.3365342204799613e-06, |
| "loss": 1.0856735229492187, |
| "step": 835, |
| "token_acc": 0.6713515977465578 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.25, |
| "learning_rate": 3.2271842837425917e-06, |
| "loss": 1.1038305282592773, |
| "step": 840, |
| "token_acc": 0.6657289592038155 |
| }, |
| { |
| "epoch": 0.7544642857142857, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.119310454105199e-06, |
| "loss": 1.0918630599975585, |
| "step": 845, |
| "token_acc": 0.6703148288973384 |
| }, |
| { |
| "epoch": 0.7589285714285714, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.0129362422216223e-06, |
| "loss": 1.0974313735961914, |
| "step": 850, |
| "token_acc": 0.6704314984982306 |
| }, |
| { |
| "epoch": 0.7633928571428571, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.908084831910237e-06, |
| "loss": 1.1198549270629883, |
| "step": 855, |
| "token_acc": 0.6628890624347961 |
| }, |
| { |
| "epoch": 0.7678571428571429, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.8047790751011216e-06, |
| "loss": 1.0908279418945312, |
| "step": 860, |
| "token_acc": 0.6704230184504901 |
| }, |
| { |
| "epoch": 0.7723214285714286, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.703041486855583e-06, |
| "loss": 1.108245849609375, |
| "step": 865, |
| "token_acc": 0.6653421633554084 |
| }, |
| { |
| "epoch": 0.7767857142857143, |
| "grad_norm": 1.25, |
| "learning_rate": 2.602894240459103e-06, |
| "loss": 1.116114044189453, |
| "step": 870, |
| "token_acc": 0.6625173300470429 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.504359162588741e-06, |
| "loss": 1.08361759185791, |
| "step": 875, |
| "token_acc": 0.673106125478107 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.407457728556115e-06, |
| "loss": 1.102341079711914, |
| "step": 880, |
| "token_acc": 0.6663129222823587 |
| }, |
| { |
| "epoch": 0.7901785714285714, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.312211057626942e-06, |
| "loss": 1.100531005859375, |
| "step": 885, |
| "token_acc": 0.6663185254637597 |
| }, |
| { |
| "epoch": 0.7946428571428571, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.218639908418189e-06, |
| "loss": 1.1055935859680175, |
| "step": 890, |
| "token_acc": 0.666173639479739 |
| }, |
| { |
| "epoch": 0.7991071428571429, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.1267646743738034e-06, |
| "loss": 1.0964255332946777, |
| "step": 895, |
| "token_acc": 0.6691968672248048 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.0366053793200567e-06, |
| "loss": 1.0952293395996093, |
| "step": 900, |
| "token_acc": 0.6708661325086683 |
| }, |
| { |
| "epoch": 0.8080357142857143, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.9481816731014412e-06, |
| "loss": 1.080392837524414, |
| "step": 905, |
| "token_acc": 0.6736406863758528 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.861512827298051e-06, |
| "loss": 1.110099983215332, |
| "step": 910, |
| "token_acc": 0.6655230956311279 |
| }, |
| { |
| "epoch": 0.8169642857142857, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.7766177310254306e-06, |
| "loss": 1.090493392944336, |
| "step": 915, |
| "token_acc": 0.6710389858691432 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.693514886817772e-06, |
| "loss": 1.0946979522705078, |
| "step": 920, |
| "token_acc": 0.6691246847215617 |
| }, |
| { |
| "epoch": 0.8258928571428571, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.6122224065953618e-06, |
| "loss": 1.0981364250183105, |
| "step": 925, |
| "token_acc": 0.6693018885434551 |
| }, |
| { |
| "epoch": 0.8303571428571429, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.5327580077171589e-06, |
| "loss": 1.084502601623535, |
| "step": 930, |
| "token_acc": 0.6716733571432733 |
| }, |
| { |
| "epoch": 0.8348214285714286, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.455139009119383e-06, |
| "loss": 1.1079372406005858, |
| "step": 935, |
| "token_acc": 0.6665884987659243 |
| }, |
| { |
| "epoch": 0.8392857142857143, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.3793823275409068e-06, |
| "loss": 1.1032150268554688, |
| "step": 940, |
| "token_acc": 0.6673829265444353 |
| }, |
| { |
| "epoch": 0.84375, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.305504473836331e-06, |
| "loss": 1.0951696395874024, |
| "step": 945, |
| "token_acc": 0.6693173328082196 |
| }, |
| { |
| "epoch": 0.8482142857142857, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.233521549377522e-06, |
| "loss": 1.091273307800293, |
| "step": 950, |
| "token_acc": 0.6727149994807473 |
| }, |
| { |
| "epoch": 0.8526785714285714, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.1634492425443634e-06, |
| "loss": 1.0925727844238282, |
| "step": 955, |
| "token_acc": 0.6704545454545454 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.0953028253055541e-06, |
| "loss": 1.1154996871948242, |
| "step": 960, |
| "token_acc": 0.6647821768795434 |
| }, |
| { |
| "epoch": 0.8616071428571429, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.0290971498901481e-06, |
| "loss": 1.105567741394043, |
| "step": 965, |
| "token_acc": 0.6661696526337861 |
| }, |
| { |
| "epoch": 0.8660714285714286, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.648466455505578e-07, |
| "loss": 1.0953655242919922, |
| "step": 970, |
| "token_acc": 0.6667339188226916 |
| }, |
| { |
| "epoch": 0.8705357142857143, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.025653154177705e-07, |
| "loss": 1.1006592750549316, |
| "step": 975, |
| "token_acc": 0.666848379986253 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 1.2890625, |
| "learning_rate": 8.42266733449425e-07, |
| "loss": 1.103367233276367, |
| "step": 980, |
| "token_acc": 0.6671485771741006 |
| }, |
| { |
| "epoch": 0.8794642857142857, |
| "grad_norm": 1.328125, |
| "learning_rate": 7.839640414714156e-07, |
| "loss": 1.1081018447875977, |
| "step": 985, |
| "token_acc": 0.664527035156532 |
| }, |
| { |
| "epoch": 0.8839285714285714, |
| "grad_norm": 1.296875, |
| "learning_rate": 7.276699463136872e-07, |
| "loss": 1.091609001159668, |
| "step": 990, |
| "token_acc": 0.6708852005532503 |
| }, |
| { |
| "epoch": 0.8883928571428571, |
| "grad_norm": 1.2734375, |
| "learning_rate": 6.733967170408451e-07, |
| "loss": 1.1118325233459472, |
| "step": 995, |
| "token_acc": 0.6650144617932624 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 1.25, |
| "learning_rate": 6.211561822781476e-07, |
| "loss": 1.0831655502319335, |
| "step": 1000, |
| "token_acc": 0.6735403414496951 |
| }, |
| { |
| "epoch": 0.8973214285714286, |
| "grad_norm": 1.34375, |
| "learning_rate": 5.709597276335144e-07, |
| "loss": 1.0991512298583985, |
| "step": 1005, |
| "token_acc": 0.6691225126435695 |
| }, |
| { |
| "epoch": 0.9017857142857143, |
| "grad_norm": 1.3046875, |
| "learning_rate": 5.228182932160841e-07, |
| "loss": 1.108486270904541, |
| "step": 1010, |
| "token_acc": 0.6649077455770052 |
| }, |
| { |
| "epoch": 0.90625, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.7674237125185597e-07, |
| "loss": 1.1087259292602538, |
| "step": 1015, |
| "token_acc": 0.6655792360501174 |
| }, |
| { |
| "epoch": 0.9107142857142857, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.327420037969532e-07, |
| "loss": 1.1094024658203125, |
| "step": 1020, |
| "token_acc": 0.663856754611703 |
| }, |
| { |
| "epoch": 0.9151785714285714, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.908267805490051e-07, |
| "loss": 1.0980701446533203, |
| "step": 1025, |
| "token_acc": 0.6679018819553691 |
| }, |
| { |
| "epoch": 0.9196428571428571, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.510058367571045e-07, |
| "loss": 1.098175048828125, |
| "step": 1030, |
| "token_acc": 0.668898687688601 |
| }, |
| { |
| "epoch": 0.9241071428571429, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.132878512308213e-07, |
| "loss": 1.1017606735229493, |
| "step": 1035, |
| "token_acc": 0.6671746861165542 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.776810444486944e-07, |
| "loss": 1.1003715515136718, |
| "step": 1040, |
| "token_acc": 0.6674644336916524 |
| }, |
| { |
| "epoch": 0.9330357142857143, |
| "grad_norm": 1.2265625, |
| "learning_rate": 2.441931767666084e-07, |
| "loss": 1.0855772972106934, |
| "step": 1045, |
| "token_acc": 0.6727498238524203 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.1283154672645522e-07, |
| "loss": 1.0910385131835938, |
| "step": 1050, |
| "token_acc": 0.6683551673944687 |
| }, |
| { |
| "epoch": 0.9419642857142857, |
| "grad_norm": 1.25, |
| "learning_rate": 1.8360298946545452e-07, |
| "loss": 1.0924718856811524, |
| "step": 1055, |
| "token_acc": 0.6707896340201002 |
| }, |
| { |
| "epoch": 0.9464285714285714, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.5651387522645721e-07, |
| "loss": 1.0851358413696288, |
| "step": 1060, |
| "token_acc": 0.6709870034475813 |
| }, |
| { |
| "epoch": 0.9508928571428571, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.315701079695775e-07, |
| "loss": 1.0930654525756835, |
| "step": 1065, |
| "token_acc": 0.669103371531395 |
| }, |
| { |
| "epoch": 0.9553571428571429, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.0877712408545294e-07, |
| "loss": 1.1010807037353516, |
| "step": 1070, |
| "token_acc": 0.6683365733045945 |
| }, |
| { |
| "epoch": 0.9598214285714286, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.813989121040478e-08, |
| "loss": 1.1053053855895996, |
| "step": 1075, |
| "token_acc": 0.6653947493353589 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 1.2890625, |
| "learning_rate": 6.966290714375934e-08, |
| "loss": 1.0881473541259765, |
| "step": 1080, |
| "token_acc": 0.671083660214032 |
| }, |
| { |
| "epoch": 0.96875, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.3350198867574424e-08, |
| "loss": 1.0896425247192383, |
| "step": 1085, |
| "token_acc": 0.6703079702533506 |
| }, |
| { |
| "epoch": 0.9732142857142857, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.9205321668972506e-08, |
| "loss": 1.1037522315979005, |
| "step": 1090, |
| "token_acc": 0.6670471076628307 |
| }, |
| { |
| "epoch": 0.9776785714285714, |
| "grad_norm": 1.25, |
| "learning_rate": 2.723135836528501e-08, |
| "loss": 1.108121109008789, |
| "step": 1095, |
| "token_acc": 0.6640236439084097 |
| }, |
| { |
| "epoch": 0.9821428571428571, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.7430918632157513e-08, |
| "loss": 1.0776978492736817, |
| "step": 1100, |
| "token_acc": 0.6742684264534163 |
| }, |
| { |
| "epoch": 0.9866071428571429, |
| "grad_norm": 1.3125, |
| "learning_rate": 9.80613843479361e-09, |
| "loss": 1.1027990341186524, |
| "step": 1105, |
| "token_acc": 0.6674602942466875 |
| }, |
| { |
| "epoch": 0.9910714285714286, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.358679562416202e-09, |
| "loss": 1.0937559127807617, |
| "step": 1110, |
| "token_acc": 0.670395055464588 |
| }, |
| { |
| "epoch": 0.9955357142857143, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.0897292660971836e-09, |
| "loss": 1.0890558242797852, |
| "step": 1115, |
| "token_acc": 0.6709881013358767 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0, |
| "loss": 1.0961315155029296, |
| "step": 1120, |
| "token_acc": 0.6685871908005899 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8781764718816133e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|