captioner-sft-3 / trainer_state.json
Evan-Lin's picture
Upload folder using huggingface_hub
bc63efd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1000.0,
"global_step": 1120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008928571428571428,
"grad_norm": 7.46875,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.9798583984375,
"step": 1,
"token_acc": 0.5198793098029355
},
{
"epoch": 0.004464285714285714,
"grad_norm": 6.84375,
"learning_rate": 1.7857142857142859e-06,
"loss": 1.942692756652832,
"step": 5,
"token_acc": 0.5277546839276701
},
{
"epoch": 0.008928571428571428,
"grad_norm": 6.0625,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.9170162200927734,
"step": 10,
"token_acc": 0.5350005891363262
},
{
"epoch": 0.013392857142857142,
"grad_norm": 4.46875,
"learning_rate": 5.357142857142857e-06,
"loss": 1.8599681854248047,
"step": 15,
"token_acc": 0.5440987793825112
},
{
"epoch": 0.017857142857142856,
"grad_norm": 3.375,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.773095703125,
"step": 20,
"token_acc": 0.558397212543554
},
{
"epoch": 0.022321428571428572,
"grad_norm": 2.59375,
"learning_rate": 8.92857142857143e-06,
"loss": 1.7239681243896485,
"step": 25,
"token_acc": 0.564640616036101
},
{
"epoch": 0.026785714285714284,
"grad_norm": 2.203125,
"learning_rate": 1.0714285714285714e-05,
"loss": 1.662449264526367,
"step": 30,
"token_acc": 0.5744314346237109
},
{
"epoch": 0.03125,
"grad_norm": 1.8125,
"learning_rate": 1.25e-05,
"loss": 1.565970230102539,
"step": 35,
"token_acc": 0.5905128053468978
},
{
"epoch": 0.03571428571428571,
"grad_norm": 1.5390625,
"learning_rate": 1.4285714285714287e-05,
"loss": 1.4979233741760254,
"step": 40,
"token_acc": 0.6035646975846081
},
{
"epoch": 0.04017857142857143,
"grad_norm": 1.46875,
"learning_rate": 1.6071428571428572e-05,
"loss": 1.4658918380737305,
"step": 45,
"token_acc": 0.6079301236643054
},
{
"epoch": 0.044642857142857144,
"grad_norm": 1.4296875,
"learning_rate": 1.785714285714286e-05,
"loss": 1.422969436645508,
"step": 50,
"token_acc": 0.6154094170613891
},
{
"epoch": 0.049107142857142856,
"grad_norm": 1.4609375,
"learning_rate": 1.9642857142857145e-05,
"loss": 1.3724126815795898,
"step": 55,
"token_acc": 0.6238321824406662
},
{
"epoch": 0.05357142857142857,
"grad_norm": 1.4609375,
"learning_rate": 1.9999302568709548e-05,
"loss": 1.3787870407104492,
"step": 60,
"token_acc": 0.6205635902788131
},
{
"epoch": 0.05803571428571429,
"grad_norm": 1.4296875,
"learning_rate": 1.999646942081983e-05,
"loss": 1.3338838577270509,
"step": 65,
"token_acc": 0.628516364022137
},
{
"epoch": 0.0625,
"grad_norm": 1.390625,
"learning_rate": 1.999145758387301e-05,
"loss": 1.3212929725646974,
"step": 70,
"token_acc": 0.6295137500294056
},
{
"epoch": 0.06696428571428571,
"grad_norm": 1.4140625,
"learning_rate": 1.998426815017817e-05,
"loss": 1.290837574005127,
"step": 75,
"token_acc": 0.6369310469506665
},
{
"epoch": 0.07142857142857142,
"grad_norm": 1.3828125,
"learning_rate": 1.997490268664256e-05,
"loss": 1.2963342666625977,
"step": 80,
"token_acc": 0.6334583410737507
},
{
"epoch": 0.07589285714285714,
"grad_norm": 1.4453125,
"learning_rate": 1.996336323443013e-05,
"loss": 1.2906134605407715,
"step": 85,
"token_acc": 0.6352960586401749
},
{
"epoch": 0.08035714285714286,
"grad_norm": 1.390625,
"learning_rate": 1.9949652308516635e-05,
"loss": 1.2730415344238282,
"step": 90,
"token_acc": 0.6381611754746083
},
{
"epoch": 0.08482142857142858,
"grad_norm": 1.4140625,
"learning_rate": 1.9933772897141525e-05,
"loss": 1.2450992584228515,
"step": 95,
"token_acc": 0.6466158412384333
},
{
"epoch": 0.08928571428571429,
"grad_norm": 1.4140625,
"learning_rate": 1.991572846115666e-05,
"loss": 1.2471370697021484,
"step": 100,
"token_acc": 0.6422763263951449
},
{
"epoch": 0.09375,
"grad_norm": 1.3984375,
"learning_rate": 1.9895522933272028e-05,
"loss": 1.2441038131713866,
"step": 105,
"token_acc": 0.6425760561463307
},
{
"epoch": 0.09821428571428571,
"grad_norm": 1.4140625,
"learning_rate": 1.9873160717198655e-05,
"loss": 1.230722427368164,
"step": 110,
"token_acc": 0.6461212036192747
},
{
"epoch": 0.10267857142857142,
"grad_norm": 1.4375,
"learning_rate": 1.9848646686688798e-05,
"loss": 1.2274154663085937,
"step": 115,
"token_acc": 0.645449105054132
},
{
"epoch": 0.10714285714285714,
"grad_norm": 1.421875,
"learning_rate": 1.9821986184473757e-05,
"loss": 1.2445575714111328,
"step": 120,
"token_acc": 0.640380009025793
},
{
"epoch": 0.11160714285714286,
"grad_norm": 1.375,
"learning_rate": 1.9793185021099426e-05,
"loss": 1.2152713775634765,
"step": 125,
"token_acc": 0.650086875941156
},
{
"epoch": 0.11607142857142858,
"grad_norm": 1.421875,
"learning_rate": 1.9762249473659936e-05,
"loss": 1.2223292350769044,
"step": 130,
"token_acc": 0.646190244076421
},
{
"epoch": 0.12053571428571429,
"grad_norm": 1.34375,
"learning_rate": 1.9729186284429567e-05,
"loss": 1.20882568359375,
"step": 135,
"token_acc": 0.6505111612683495
},
{
"epoch": 0.125,
"grad_norm": 1.4765625,
"learning_rate": 1.9694002659393306e-05,
"loss": 1.198903465270996,
"step": 140,
"token_acc": 0.6506264468770722
},
{
"epoch": 0.12946428571428573,
"grad_norm": 1.453125,
"learning_rate": 1.965670626667633e-05,
"loss": 1.18528413772583,
"step": 145,
"token_acc": 0.6555728824688115
},
{
"epoch": 0.13392857142857142,
"grad_norm": 1.46875,
"learning_rate": 1.9617305234872773e-05,
"loss": 1.2123258590698243,
"step": 150,
"token_acc": 0.648010957945968
},
{
"epoch": 0.13839285714285715,
"grad_norm": 1.3984375,
"learning_rate": 1.9575808151274133e-05,
"loss": 1.20123291015625,
"step": 155,
"token_acc": 0.6520161821001254
},
{
"epoch": 0.14285714285714285,
"grad_norm": 1.453125,
"learning_rate": 1.9532224059997693e-05,
"loss": 1.2176162719726562,
"step": 160,
"token_acc": 0.6464380895343614
},
{
"epoch": 0.14732142857142858,
"grad_norm": 1.375,
"learning_rate": 1.948656246001542e-05,
"loss": 1.1905139923095702,
"step": 165,
"token_acc": 0.6515007571439264
},
{
"epoch": 0.15178571428571427,
"grad_norm": 1.34375,
"learning_rate": 1.9438833303083677e-05,
"loss": 1.2004039764404297,
"step": 170,
"token_acc": 0.650028138976926
},
{
"epoch": 0.15625,
"grad_norm": 1.3515625,
"learning_rate": 1.9389046991574298e-05,
"loss": 1.1866092681884766,
"step": 175,
"token_acc": 0.6536794480531047
},
{
"epoch": 0.16071428571428573,
"grad_norm": 1.359375,
"learning_rate": 1.9337214376207417e-05,
"loss": 1.1794092178344726,
"step": 180,
"token_acc": 0.6547250738292786
},
{
"epoch": 0.16517857142857142,
"grad_norm": 1.3515625,
"learning_rate": 1.9283346753686625e-05,
"loss": 1.1931296348571778,
"step": 185,
"token_acc": 0.6512469949273786
},
{
"epoch": 0.16964285714285715,
"grad_norm": 1.4375,
"learning_rate": 1.922745586423687e-05,
"loss": 1.175852394104004,
"step": 190,
"token_acc": 0.6550968319583254
},
{
"epoch": 0.17410714285714285,
"grad_norm": 1.359375,
"learning_rate": 1.9169553889045732e-05,
"loss": 1.1729495048522949,
"step": 195,
"token_acc": 0.6564944949906185
},
{
"epoch": 0.17857142857142858,
"grad_norm": 1.296875,
"learning_rate": 1.9109653447608607e-05,
"loss": 1.1875883102416993,
"step": 200,
"token_acc": 0.6519358095416979
},
{
"epoch": 0.18303571428571427,
"grad_norm": 1.453125,
"learning_rate": 1.9047767594978308e-05,
"loss": 1.1628761291503906,
"step": 205,
"token_acc": 0.6568504306122326
},
{
"epoch": 0.1875,
"grad_norm": 1.4375,
"learning_rate": 1.898390981891979e-05,
"loss": 1.1821978569030762,
"step": 210,
"token_acc": 0.6514381801603373
},
{
"epoch": 0.19196428571428573,
"grad_norm": 1.3671875,
"learning_rate": 1.891809403697054e-05,
"loss": 1.1805411338806153,
"step": 215,
"token_acc": 0.6520954938127481
},
{
"epoch": 0.19642857142857142,
"grad_norm": 1.3828125,
"learning_rate": 1.885033459340731e-05,
"loss": 1.1788909912109375,
"step": 220,
"token_acc": 0.6525619317127482
},
{
"epoch": 0.20089285714285715,
"grad_norm": 1.3359375,
"learning_rate": 1.8780646256119843e-05,
"loss": 1.1617810249328613,
"step": 225,
"token_acc": 0.6574105324410824
},
{
"epoch": 0.20535714285714285,
"grad_norm": 1.4453125,
"learning_rate": 1.8709044213392265e-05,
"loss": 1.1675668716430665,
"step": 230,
"token_acc": 0.6567929066970355
},
{
"epoch": 0.20982142857142858,
"grad_norm": 1.3671875,
"learning_rate": 1.8635544070592876e-05,
"loss": 1.1429882049560547,
"step": 235,
"token_acc": 0.6626304648850445
},
{
"epoch": 0.21428571428571427,
"grad_norm": 1.46875,
"learning_rate": 1.8560161846773002e-05,
"loss": 1.1523635864257813,
"step": 240,
"token_acc": 0.6588007349650185
},
{
"epoch": 0.21875,
"grad_norm": 1.3828125,
"learning_rate": 1.8482913971175737e-05,
"loss": 1.1638723373413087,
"step": 245,
"token_acc": 0.6575461339567173
},
{
"epoch": 0.22321428571428573,
"grad_norm": 1.3828125,
"learning_rate": 1.8403817279655237e-05,
"loss": 1.1588207244873048,
"step": 250,
"token_acc": 0.6566742318540298
},
{
"epoch": 0.22767857142857142,
"grad_norm": 1.375,
"learning_rate": 1.8322889011007424e-05,
"loss": 1.1697870254516602,
"step": 255,
"token_acc": 0.6530151356474628
},
{
"epoch": 0.23214285714285715,
"grad_norm": 1.375,
"learning_rate": 1.8240146803212854e-05,
"loss": 1.163081169128418,
"step": 260,
"token_acc": 0.654347747769385
},
{
"epoch": 0.23660714285714285,
"grad_norm": 1.3515625,
"learning_rate": 1.8155608689592604e-05,
"loss": 1.1553804397583007,
"step": 265,
"token_acc": 0.656771626981819
},
{
"epoch": 0.24107142857142858,
"grad_norm": 1.3984375,
"learning_rate": 1.8069293094877974e-05,
"loss": 1.1408929824829102,
"step": 270,
"token_acc": 0.6625453583525501
},
{
"epoch": 0.24553571428571427,
"grad_norm": 1.34375,
"learning_rate": 1.7981218831194904e-05,
"loss": 1.1588726043701172,
"step": 275,
"token_acc": 0.6563144046706936
},
{
"epoch": 0.25,
"grad_norm": 1.3671875,
"learning_rate": 1.789140509396394e-05,
"loss": 1.1532201766967773,
"step": 280,
"token_acc": 0.6581244540600577
},
{
"epoch": 0.2544642857142857,
"grad_norm": 1.3515625,
"learning_rate": 1.7799871457716665e-05,
"loss": 1.1504724502563477,
"step": 285,
"token_acc": 0.6587934607950444
},
{
"epoch": 0.25892857142857145,
"grad_norm": 1.359375,
"learning_rate": 1.770663787182954e-05,
"loss": 1.1456276893615722,
"step": 290,
"token_acc": 0.6618624162066538
},
{
"epoch": 0.26339285714285715,
"grad_norm": 1.3125,
"learning_rate": 1.7611724656175982e-05,
"loss": 1.1442519187927247,
"step": 295,
"token_acc": 0.6591544798140778
},
{
"epoch": 0.26785714285714285,
"grad_norm": 1.375,
"learning_rate": 1.7515152496697765e-05,
"loss": 1.152108383178711,
"step": 300,
"token_acc": 0.6586486613213298
},
{
"epoch": 0.27232142857142855,
"grad_norm": 1.3515625,
"learning_rate": 1.7416942440896577e-05,
"loss": 1.1452009201049804,
"step": 305,
"token_acc": 0.6591358039294544
},
{
"epoch": 0.2767857142857143,
"grad_norm": 1.34375,
"learning_rate": 1.7317115893246833e-05,
"loss": 1.1422765731811524,
"step": 310,
"token_acc": 0.6589653054958551
},
{
"epoch": 0.28125,
"grad_norm": 1.3515625,
"learning_rate": 1.7215694610530624e-05,
"loss": 1.1257204055786132,
"step": 315,
"token_acc": 0.6644322342183476
},
{
"epoch": 0.2857142857142857,
"grad_norm": 1.3984375,
"learning_rate": 1.7112700697095955e-05,
"loss": 1.150202178955078,
"step": 320,
"token_acc": 0.657143724020753
},
{
"epoch": 0.29017857142857145,
"grad_norm": 1.265625,
"learning_rate": 1.7008156600039157e-05,
"loss": 1.1264986038208007,
"step": 325,
"token_acc": 0.6647021643690532
},
{
"epoch": 0.29464285714285715,
"grad_norm": 1.390625,
"learning_rate": 1.690208510431267e-05,
"loss": 1.1261561393737793,
"step": 330,
"token_acc": 0.6645312870725624
},
{
"epoch": 0.29910714285714285,
"grad_norm": 1.421875,
"learning_rate": 1.6794509327759132e-05,
"loss": 1.1390050888061523,
"step": 335,
"token_acc": 0.6606452523992492
},
{
"epoch": 0.30357142857142855,
"grad_norm": 1.34375,
"learning_rate": 1.6685452716072946e-05,
"loss": 1.128352451324463,
"step": 340,
"token_acc": 0.6637220739566702
},
{
"epoch": 0.3080357142857143,
"grad_norm": 1.359375,
"learning_rate": 1.6574939037690394e-05,
"loss": 1.1525999069213868,
"step": 345,
"token_acc": 0.6566719096261906
},
{
"epoch": 0.3125,
"grad_norm": 1.3671875,
"learning_rate": 1.646299237860941e-05,
"loss": 1.1295086860656738,
"step": 350,
"token_acc": 0.6623984060840936
},
{
"epoch": 0.3169642857142857,
"grad_norm": 1.375,
"learning_rate": 1.634963713714012e-05,
"loss": 1.1322021484375,
"step": 355,
"token_acc": 0.6617089233387465
},
{
"epoch": 0.32142857142857145,
"grad_norm": 1.390625,
"learning_rate": 1.6234898018587336e-05,
"loss": 1.1252225875854491,
"step": 360,
"token_acc": 0.6641228446029669
},
{
"epoch": 0.32589285714285715,
"grad_norm": 1.328125,
"learning_rate": 1.6118800029866157e-05,
"loss": 1.1185049057006835,
"step": 365,
"token_acc": 0.6656541637014795
},
{
"epoch": 0.33035714285714285,
"grad_norm": 1.3515625,
"learning_rate": 1.600136847405179e-05,
"loss": 1.1188979148864746,
"step": 370,
"token_acc": 0.6649322180559561
},
{
"epoch": 0.33482142857142855,
"grad_norm": 1.7421875,
"learning_rate": 1.5882628944864862e-05,
"loss": 1.1338905334472655,
"step": 375,
"token_acc": 0.6627974764645106
},
{
"epoch": 0.3392857142857143,
"grad_norm": 1.328125,
"learning_rate": 1.5762607321093368e-05,
"loss": 1.1297473907470703,
"step": 380,
"token_acc": 0.662811429370499
},
{
"epoch": 0.34375,
"grad_norm": 1.3359375,
"learning_rate": 1.5641329760952514e-05,
"loss": 1.1086080551147461,
"step": 385,
"token_acc": 0.6686927987727868
},
{
"epoch": 0.3482142857142857,
"grad_norm": 1.3203125,
"learning_rate": 1.5518822696383612e-05,
"loss": 1.1249174118041991,
"step": 390,
"token_acc": 0.6625904370106592
},
{
"epoch": 0.35267857142857145,
"grad_norm": 1.3203125,
"learning_rate": 1.539511282729338e-05,
"loss": 1.1013822555541992,
"step": 395,
"token_acc": 0.670699511008058
},
{
"epoch": 0.35714285714285715,
"grad_norm": 1.3515625,
"learning_rate": 1.527022711573479e-05,
"loss": 1.1248649597167968,
"step": 400,
"token_acc": 0.6626294342018012
},
{
"epoch": 0.36160714285714285,
"grad_norm": 1.28125,
"learning_rate": 1.51441927800308e-05,
"loss": 1.119422721862793,
"step": 405,
"token_acc": 0.6646745395834887
},
{
"epoch": 0.36607142857142855,
"grad_norm": 1.359375,
"learning_rate": 1.5017037288842238e-05,
"loss": 1.1286213874816895,
"step": 410,
"token_acc": 0.6624454043387759
},
{
"epoch": 0.3705357142857143,
"grad_norm": 1.375,
"learning_rate": 1.4888788355181128e-05,
"loss": 1.1240810394287108,
"step": 415,
"token_acc": 0.661629880136473
},
{
"epoch": 0.375,
"grad_norm": 1.296875,
"learning_rate": 1.4759473930370738e-05,
"loss": 1.122232151031494,
"step": 420,
"token_acc": 0.6647913929040736
},
{
"epoch": 0.3794642857142857,
"grad_norm": 1.3359375,
"learning_rate": 1.4629122197953716e-05,
"loss": 1.127860927581787,
"step": 425,
"token_acc": 0.6610134748138546
},
{
"epoch": 0.38392857142857145,
"grad_norm": 1.328125,
"learning_rate": 1.4497761567549602e-05,
"loss": 1.1123634338378907,
"step": 430,
"token_acc": 0.6672016743978223
},
{
"epoch": 0.38839285714285715,
"grad_norm": 1.3046875,
"learning_rate": 1.4365420668663075e-05,
"loss": 1.1055331230163574,
"step": 435,
"token_acc": 0.667418617766589
},
{
"epoch": 0.39285714285714285,
"grad_norm": 1.4140625,
"learning_rate": 1.4232128344444251e-05,
"loss": 1.1390070915222168,
"step": 440,
"token_acc": 0.6599930821407843
},
{
"epoch": 0.39732142857142855,
"grad_norm": 1.3359375,
"learning_rate": 1.4097913645402463e-05,
"loss": 1.119845199584961,
"step": 445,
"token_acc": 0.6643133551388953
},
{
"epoch": 0.4017857142857143,
"grad_norm": 1.3359375,
"learning_rate": 1.396280582307481e-05,
"loss": 1.1190576553344727,
"step": 450,
"token_acc": 0.6644705200637404
},
{
"epoch": 0.40625,
"grad_norm": 1.265625,
"learning_rate": 1.3826834323650899e-05,
"loss": 1.114608383178711,
"step": 455,
"token_acc": 0.6656076250992852
},
{
"epoch": 0.4107142857142857,
"grad_norm": 1.375,
"learning_rate": 1.369002878155519e-05,
"loss": 1.122308349609375,
"step": 460,
"token_acc": 0.6635312122906672
},
{
"epoch": 0.41517857142857145,
"grad_norm": 1.34375,
"learning_rate": 1.3552419012988284e-05,
"loss": 1.112066650390625,
"step": 465,
"token_acc": 0.6657017205282056
},
{
"epoch": 0.41964285714285715,
"grad_norm": 1.296875,
"learning_rate": 1.3414035009428598e-05,
"loss": 1.1216192245483398,
"step": 470,
"token_acc": 0.663814267923857
},
{
"epoch": 0.42410714285714285,
"grad_norm": 1.34375,
"learning_rate": 1.3274906931095863e-05,
"loss": 1.1180400848388672,
"step": 475,
"token_acc": 0.6634265241395513
},
{
"epoch": 0.42857142857142855,
"grad_norm": 1.2890625,
"learning_rate": 1.3135065100377816e-05,
"loss": 1.1156521797180177,
"step": 480,
"token_acc": 0.664764806569064
},
{
"epoch": 0.4330357142857143,
"grad_norm": 1.3125,
"learning_rate": 1.2994539995221564e-05,
"loss": 1.115281867980957,
"step": 485,
"token_acc": 0.6655095086932463
},
{
"epoch": 0.4375,
"grad_norm": 1.3515625,
"learning_rate": 1.2853362242491054e-05,
"loss": 1.1114022254943847,
"step": 490,
"token_acc": 0.6649909487491468
},
{
"epoch": 0.4419642857142857,
"grad_norm": 1.2421875,
"learning_rate": 1.2711562611292063e-05,
"loss": 1.1045937538146973,
"step": 495,
"token_acc": 0.6681451286030272
},
{
"epoch": 0.44642857142857145,
"grad_norm": 1.3828125,
"learning_rate": 1.2569172006266192e-05,
"loss": 1.1057794570922852,
"step": 500,
"token_acc": 0.6668334494856537
},
{
"epoch": 0.45089285714285715,
"grad_norm": 1.3359375,
"learning_rate": 1.2426221460855352e-05,
"loss": 1.1071063041687013,
"step": 505,
"token_acc": 0.6664125673607487
},
{
"epoch": 0.45535714285714285,
"grad_norm": 1.265625,
"learning_rate": 1.2282742130538121e-05,
"loss": 1.0995834350585938,
"step": 510,
"token_acc": 0.6681870274068059
},
{
"epoch": 0.45982142857142855,
"grad_norm": 1.34375,
"learning_rate": 1.2138765286039573e-05,
"loss": 1.1039738655090332,
"step": 515,
"token_acc": 0.668202407915127
},
{
"epoch": 0.4642857142857143,
"grad_norm": 1.2890625,
"learning_rate": 1.1994322306515926e-05,
"loss": 1.11993465423584,
"step": 520,
"token_acc": 0.6619411576211444
},
{
"epoch": 0.46875,
"grad_norm": 1.3046875,
"learning_rate": 1.1849444672715587e-05,
"loss": 1.0845392227172852,
"step": 525,
"token_acc": 0.6731254973876336
},
{
"epoch": 0.4732142857142857,
"grad_norm": 1.2109375,
"learning_rate": 1.1704163960118069e-05,
"loss": 1.0899698257446289,
"step": 530,
"token_acc": 0.6714283226626576
},
{
"epoch": 0.47767857142857145,
"grad_norm": 1.3828125,
"learning_rate": 1.155851183205224e-05,
"loss": 1.1180584907531739,
"step": 535,
"token_acc": 0.6640847703163699
},
{
"epoch": 0.48214285714285715,
"grad_norm": 1.265625,
"learning_rate": 1.141252003279542e-05,
"loss": 1.0958803176879883,
"step": 540,
"token_acc": 0.6683001406764094
},
{
"epoch": 0.48660714285714285,
"grad_norm": 1.25,
"learning_rate": 1.1266220380654862e-05,
"loss": 1.0876192092895507,
"step": 545,
"token_acc": 0.674294498100293
},
{
"epoch": 0.49107142857142855,
"grad_norm": 1.3203125,
"learning_rate": 1.1119644761033079e-05,
"loss": 1.1089275360107422,
"step": 550,
"token_acc": 0.6656800260519644
},
{
"epoch": 0.4955357142857143,
"grad_norm": 1.3203125,
"learning_rate": 1.097282511947855e-05,
"loss": 1.105890655517578,
"step": 555,
"token_acc": 0.6669049856720307
},
{
"epoch": 0.5,
"grad_norm": 1.234375,
"learning_rate": 1.0825793454723325e-05,
"loss": 1.0942912101745605,
"step": 560,
"token_acc": 0.6705247046279258
},
{
"epoch": 0.5044642857142857,
"grad_norm": 1.3046875,
"learning_rate": 1.0678581811709025e-05,
"loss": 1.0961302757263183,
"step": 565,
"token_acc": 0.6700240310483472
},
{
"epoch": 0.5089285714285714,
"grad_norm": 1.3359375,
"learning_rate": 1.0531222274602795e-05,
"loss": 1.0944637298583983,
"step": 570,
"token_acc": 0.6694562628384824
},
{
"epoch": 0.5133928571428571,
"grad_norm": 1.2578125,
"learning_rate": 1.0383746959804672e-05,
"loss": 1.0805794715881347,
"step": 575,
"token_acc": 0.6751718465425063
},
{
"epoch": 0.5178571428571429,
"grad_norm": 1.2578125,
"learning_rate": 1.023618800894798e-05,
"loss": 1.0854421615600587,
"step": 580,
"token_acc": 0.6732954215748854
},
{
"epoch": 0.5223214285714286,
"grad_norm": 1.2890625,
"learning_rate": 1.0088577581894154e-05,
"loss": 1.0917674064636231,
"step": 585,
"token_acc": 0.670189745515675
},
{
"epoch": 0.5267857142857143,
"grad_norm": 1.3203125,
"learning_rate": 9.94094784972367e-06,
"loss": 1.0841856002807617,
"step": 590,
"token_acc": 0.6734028220945898
},
{
"epoch": 0.53125,
"grad_norm": 1.296875,
"learning_rate": 9.79333098772446e-06,
"loss": 1.1098053932189942,
"step": 595,
"token_acc": 0.6651632518802563
},
{
"epoch": 0.5357142857142857,
"grad_norm": 1.2421875,
"learning_rate": 9.645759168379463e-06,
"loss": 1.1012333869934081,
"step": 600,
"token_acc": 0.667337838941999
},
{
"epoch": 0.5401785714285714,
"grad_norm": 1.3125,
"learning_rate": 9.498264554354761e-06,
"loss": 1.0885583877563476,
"step": 605,
"token_acc": 0.6714689082367089
},
{
"epoch": 0.5446428571428571,
"grad_norm": 1.3046875,
"learning_rate": 9.350879291489848e-06,
"loss": 1.104905128479004,
"step": 610,
"token_acc": 0.6663167322052396
},
{
"epoch": 0.5491071428571429,
"grad_norm": 1.2734375,
"learning_rate": 9.203635501791595e-06,
"loss": 1.1066022872924806,
"step": 615,
"token_acc": 0.6652294646322893
},
{
"epoch": 0.5535714285714286,
"grad_norm": 1.2890625,
"learning_rate": 9.056565276433378e-06,
"loss": 1.1055733680725097,
"step": 620,
"token_acc": 0.6665237252904985
},
{
"epoch": 0.5580357142857143,
"grad_norm": 1.3515625,
"learning_rate": 8.909700668760945e-06,
"loss": 1.1153239250183105,
"step": 625,
"token_acc": 0.6633878551716786
},
{
"epoch": 0.5625,
"grad_norm": 1.265625,
"learning_rate": 8.763073687306523e-06,
"loss": 1.1114237785339356,
"step": 630,
"token_acc": 0.665496981051093
},
{
"epoch": 0.5669642857142857,
"grad_norm": 1.3203125,
"learning_rate": 8.616716288812694e-06,
"loss": 1.0952648162841796,
"step": 635,
"token_acc": 0.6696672683549216
},
{
"epoch": 0.5714285714285714,
"grad_norm": 1.28125,
"learning_rate": 8.47066037126754e-06,
"loss": 1.1008512496948242,
"step": 640,
"token_acc": 0.6676704190666269
},
{
"epoch": 0.5758928571428571,
"grad_norm": 1.3515625,
"learning_rate": 8.324937766952638e-06,
"loss": 1.1116990089416503,
"step": 645,
"token_acc": 0.6651052619143255
},
{
"epoch": 0.5803571428571429,
"grad_norm": 1.3046875,
"learning_rate": 8.17958023550531e-06,
"loss": 1.1096595764160155,
"step": 650,
"token_acc": 0.6650905800358365
},
{
"epoch": 0.5848214285714286,
"grad_norm": 1.2890625,
"learning_rate": 8.03461945699677e-06,
"loss": 1.0997350692749024,
"step": 655,
"token_acc": 0.66805490553854
},
{
"epoch": 0.5892857142857143,
"grad_norm": 1.2265625,
"learning_rate": 7.89008702502758e-06,
"loss": 1.1013197898864746,
"step": 660,
"token_acc": 0.6695434588695944
},
{
"epoch": 0.59375,
"grad_norm": 1.3359375,
"learning_rate": 7.746014439841941e-06,
"loss": 1.1132354736328125,
"step": 665,
"token_acc": 0.6643929538342533
},
{
"epoch": 0.5982142857142857,
"grad_norm": 1.265625,
"learning_rate": 7.602433101462351e-06,
"loss": 1.1085187911987304,
"step": 670,
"token_acc": 0.6662411498410222
},
{
"epoch": 0.6026785714285714,
"grad_norm": 1.328125,
"learning_rate": 7.459374302846114e-06,
"loss": 1.0951228141784668,
"step": 675,
"token_acc": 0.6677065047207857
},
{
"epoch": 0.6071428571428571,
"grad_norm": 1.375,
"learning_rate": 7.316869223065156e-06,
"loss": 1.114619827270508,
"step": 680,
"token_acc": 0.6641058671975641
},
{
"epoch": 0.6116071428571429,
"grad_norm": 1.296875,
"learning_rate": 7.174948920510675e-06,
"loss": 1.0971644401550293,
"step": 685,
"token_acc": 0.6688319741107969
},
{
"epoch": 0.6160714285714286,
"grad_norm": 1.296875,
"learning_rate": 7.033644326124104e-06,
"loss": 1.104668140411377,
"step": 690,
"token_acc": 0.6667567008413406
},
{
"epoch": 0.6205357142857143,
"grad_norm": 1.3359375,
"learning_rate": 6.892986236655827e-06,
"loss": 1.114396095275879,
"step": 695,
"token_acc": 0.6627671848794471
},
{
"epoch": 0.625,
"grad_norm": 1.3046875,
"learning_rate": 6.7530053079531664e-06,
"loss": 1.098177146911621,
"step": 700,
"token_acc": 0.668855782018009
},
{
"epoch": 0.6294642857142857,
"grad_norm": 1.265625,
"learning_rate": 6.613732048279064e-06,
"loss": 1.0986656188964843,
"step": 705,
"token_acc": 0.6675273764392436
},
{
"epoch": 0.6339285714285714,
"grad_norm": 1.2890625,
"learning_rate": 6.475196811662929e-06,
"loss": 1.0969505310058594,
"step": 710,
"token_acc": 0.6685544895216992
},
{
"epoch": 0.6383928571428571,
"grad_norm": 1.265625,
"learning_rate": 6.337429791285107e-06,
"loss": 1.0815807342529298,
"step": 715,
"token_acc": 0.6731986752641865
},
{
"epoch": 0.6428571428571429,
"grad_norm": 1.265625,
"learning_rate": 6.200461012896401e-06,
"loss": 1.0996244430541993,
"step": 720,
"token_acc": 0.66839132010241
},
{
"epoch": 0.6473214285714286,
"grad_norm": 1.3125,
"learning_rate": 6.064320328274079e-06,
"loss": 1.0926881790161134,
"step": 725,
"token_acc": 0.6687555529230587
},
{
"epoch": 0.6517857142857143,
"grad_norm": 1.3359375,
"learning_rate": 5.929037408715812e-06,
"loss": 1.1201751708984375,
"step": 730,
"token_acc": 0.6626515495086923
},
{
"epoch": 0.65625,
"grad_norm": 1.328125,
"learning_rate": 5.794641738572925e-06,
"loss": 1.1207469940185546,
"step": 735,
"token_acc": 0.6625378393718664
},
{
"epoch": 0.6607142857142857,
"grad_norm": 1.2578125,
"learning_rate": 5.66116260882442e-06,
"loss": 1.0935090065002442,
"step": 740,
"token_acc": 0.6697370980003889
},
{
"epoch": 0.6651785714285714,
"grad_norm": 1.3046875,
"learning_rate": 5.528629110693111e-06,
"loss": 1.1128035545349122,
"step": 745,
"token_acc": 0.6645328465003976
},
{
"epoch": 0.6696428571428571,
"grad_norm": 1.265625,
"learning_rate": 5.397070129305343e-06,
"loss": 1.0865594863891601,
"step": 750,
"token_acc": 0.6718154777435683
},
{
"epoch": 0.6741071428571429,
"grad_norm": 1.2734375,
"learning_rate": 5.2665143373955476e-06,
"loss": 1.105082130432129,
"step": 755,
"token_acc": 0.6654881601202781
},
{
"epoch": 0.6785714285714286,
"grad_norm": 1.265625,
"learning_rate": 5.136990189057187e-06,
"loss": 1.0965201377868652,
"step": 760,
"token_acc": 0.6685612497216721
},
{
"epoch": 0.6830357142857143,
"grad_norm": 1.234375,
"learning_rate": 5.008525913541292e-06,
"loss": 1.0776897430419923,
"step": 765,
"token_acc": 0.673377759708695
},
{
"epoch": 0.6875,
"grad_norm": 1.3125,
"learning_rate": 4.881149509103993e-06,
"loss": 1.1034416198730468,
"step": 770,
"token_acc": 0.6676547557024715
},
{
"epoch": 0.6919642857142857,
"grad_norm": 1.3046875,
"learning_rate": 4.754888736904432e-06,
"loss": 1.111644172668457,
"step": 775,
"token_acc": 0.6635881969829432
},
{
"epoch": 0.6964285714285714,
"grad_norm": 1.28125,
"learning_rate": 4.629771114954341e-06,
"loss": 1.1045246124267578,
"step": 780,
"token_acc": 0.6661986169129779
},
{
"epoch": 0.7008928571428571,
"grad_norm": 1.203125,
"learning_rate": 4.505823912120586e-06,
"loss": 1.083775806427002,
"step": 785,
"token_acc": 0.6717203862042463
},
{
"epoch": 0.7053571428571429,
"grad_norm": 1.265625,
"learning_rate": 4.3830741421820376e-06,
"loss": 1.0916669845581055,
"step": 790,
"token_acc": 0.670933072789448
},
{
"epoch": 0.7098214285714286,
"grad_norm": 1.3359375,
"learning_rate": 4.261548557942047e-06,
"loss": 1.11307373046875,
"step": 795,
"token_acc": 0.6643882351532854
},
{
"epoch": 0.7142857142857143,
"grad_norm": 1.25,
"learning_rate": 4.1412736453977545e-06,
"loss": 1.0729114532470703,
"step": 800,
"token_acc": 0.6777025043600384
},
{
"epoch": 0.71875,
"grad_norm": 1.2421875,
"learning_rate": 4.0222756179675915e-06,
"loss": 1.0922969818115233,
"step": 805,
"token_acc": 0.669288262867004
},
{
"epoch": 0.7232142857142857,
"grad_norm": 1.2734375,
"learning_rate": 3.904580410778185e-06,
"loss": 1.1067237854003906,
"step": 810,
"token_acc": 0.6661032228983548
},
{
"epoch": 0.7276785714285714,
"grad_norm": 1.3046875,
"learning_rate": 3.7882136750118823e-06,
"loss": 1.0945161819458007,
"step": 815,
"token_acc": 0.6704976778178561
},
{
"epoch": 0.7321428571428571,
"grad_norm": 1.2578125,
"learning_rate": 3.6732007723161933e-06,
"loss": 1.0965933799743652,
"step": 820,
"token_acc": 0.6691131570294518
},
{
"epoch": 0.7366071428571429,
"grad_norm": 1.2734375,
"learning_rate": 3.5595667692763346e-06,
"loss": 1.0896465301513671,
"step": 825,
"token_acc": 0.6698150243632081
},
{
"epoch": 0.7410714285714286,
"grad_norm": 1.2421875,
"learning_rate": 3.447336431952052e-06,
"loss": 1.103147315979004,
"step": 830,
"token_acc": 0.6670983919710921
},
{
"epoch": 0.7455357142857143,
"grad_norm": 1.3359375,
"learning_rate": 3.3365342204799613e-06,
"loss": 1.0856735229492187,
"step": 835,
"token_acc": 0.6713515977465578
},
{
"epoch": 0.75,
"grad_norm": 1.25,
"learning_rate": 3.2271842837425917e-06,
"loss": 1.1038305282592773,
"step": 840,
"token_acc": 0.6657289592038155
},
{
"epoch": 0.7544642857142857,
"grad_norm": 1.328125,
"learning_rate": 3.119310454105199e-06,
"loss": 1.0918630599975585,
"step": 845,
"token_acc": 0.6703148288973384
},
{
"epoch": 0.7589285714285714,
"grad_norm": 1.2421875,
"learning_rate": 3.0129362422216223e-06,
"loss": 1.0974313735961914,
"step": 850,
"token_acc": 0.6704314984982306
},
{
"epoch": 0.7633928571428571,
"grad_norm": 1.328125,
"learning_rate": 2.908084831910237e-06,
"loss": 1.1198549270629883,
"step": 855,
"token_acc": 0.6628890624347961
},
{
"epoch": 0.7678571428571429,
"grad_norm": 1.2734375,
"learning_rate": 2.8047790751011216e-06,
"loss": 1.0908279418945312,
"step": 860,
"token_acc": 0.6704230184504901
},
{
"epoch": 0.7723214285714286,
"grad_norm": 1.234375,
"learning_rate": 2.703041486855583e-06,
"loss": 1.108245849609375,
"step": 865,
"token_acc": 0.6653421633554084
},
{
"epoch": 0.7767857142857143,
"grad_norm": 1.25,
"learning_rate": 2.602894240459103e-06,
"loss": 1.116114044189453,
"step": 870,
"token_acc": 0.6625173300470429
},
{
"epoch": 0.78125,
"grad_norm": 1.1796875,
"learning_rate": 2.504359162588741e-06,
"loss": 1.08361759185791,
"step": 875,
"token_acc": 0.673106125478107
},
{
"epoch": 0.7857142857142857,
"grad_norm": 1.296875,
"learning_rate": 2.407457728556115e-06,
"loss": 1.102341079711914,
"step": 880,
"token_acc": 0.6663129222823587
},
{
"epoch": 0.7901785714285714,
"grad_norm": 1.2734375,
"learning_rate": 2.312211057626942e-06,
"loss": 1.100531005859375,
"step": 885,
"token_acc": 0.6663185254637597
},
{
"epoch": 0.7946428571428571,
"grad_norm": 1.3203125,
"learning_rate": 2.218639908418189e-06,
"loss": 1.1055935859680175,
"step": 890,
"token_acc": 0.666173639479739
},
{
"epoch": 0.7991071428571429,
"grad_norm": 1.2734375,
"learning_rate": 2.1267646743738034e-06,
"loss": 1.0964255332946777,
"step": 895,
"token_acc": 0.6691968672248048
},
{
"epoch": 0.8035714285714286,
"grad_norm": 1.2109375,
"learning_rate": 2.0366053793200567e-06,
"loss": 1.0952293395996093,
"step": 900,
"token_acc": 0.6708661325086683
},
{
"epoch": 0.8080357142857143,
"grad_norm": 1.3203125,
"learning_rate": 1.9481816731014412e-06,
"loss": 1.080392837524414,
"step": 905,
"token_acc": 0.6736406863758528
},
{
"epoch": 0.8125,
"grad_norm": 1.3046875,
"learning_rate": 1.861512827298051e-06,
"loss": 1.110099983215332,
"step": 910,
"token_acc": 0.6655230956311279
},
{
"epoch": 0.8169642857142857,
"grad_norm": 1.2265625,
"learning_rate": 1.7766177310254306e-06,
"loss": 1.090493392944336,
"step": 915,
"token_acc": 0.6710389858691432
},
{
"epoch": 0.8214285714285714,
"grad_norm": 1.359375,
"learning_rate": 1.693514886817772e-06,
"loss": 1.0946979522705078,
"step": 920,
"token_acc": 0.6691246847215617
},
{
"epoch": 0.8258928571428571,
"grad_norm": 1.265625,
"learning_rate": 1.6122224065953618e-06,
"loss": 1.0981364250183105,
"step": 925,
"token_acc": 0.6693018885434551
},
{
"epoch": 0.8303571428571429,
"grad_norm": 1.3203125,
"learning_rate": 1.5327580077171589e-06,
"loss": 1.084502601623535,
"step": 930,
"token_acc": 0.6716733571432733
},
{
"epoch": 0.8348214285714286,
"grad_norm": 1.2890625,
"learning_rate": 1.455139009119383e-06,
"loss": 1.1079372406005858,
"step": 935,
"token_acc": 0.6665884987659243
},
{
"epoch": 0.8392857142857143,
"grad_norm": 1.484375,
"learning_rate": 1.3793823275409068e-06,
"loss": 1.1032150268554688,
"step": 940,
"token_acc": 0.6673829265444353
},
{
"epoch": 0.84375,
"grad_norm": 1.234375,
"learning_rate": 1.305504473836331e-06,
"loss": 1.0951696395874024,
"step": 945,
"token_acc": 0.6693173328082196
},
{
"epoch": 0.8482142857142857,
"grad_norm": 1.2890625,
"learning_rate": 1.233521549377522e-06,
"loss": 1.091273307800293,
"step": 950,
"token_acc": 0.6727149994807473
},
{
"epoch": 0.8526785714285714,
"grad_norm": 1.2265625,
"learning_rate": 1.1634492425443634e-06,
"loss": 1.0925727844238282,
"step": 955,
"token_acc": 0.6704545454545454
},
{
"epoch": 0.8571428571428571,
"grad_norm": 1.3046875,
"learning_rate": 1.0953028253055541e-06,
"loss": 1.1154996871948242,
"step": 960,
"token_acc": 0.6647821768795434
},
{
"epoch": 0.8616071428571429,
"grad_norm": 1.296875,
"learning_rate": 1.0290971498901481e-06,
"loss": 1.105567741394043,
"step": 965,
"token_acc": 0.6661696526337861
},
{
"epoch": 0.8660714285714286,
"grad_norm": 1.28125,
"learning_rate": 9.648466455505578e-07,
"loss": 1.0953655242919922,
"step": 970,
"token_acc": 0.6667339188226916
},
{
"epoch": 0.8705357142857143,
"grad_norm": 1.296875,
"learning_rate": 9.025653154177705e-07,
"loss": 1.1006592750549316,
"step": 975,
"token_acc": 0.666848379986253
},
{
"epoch": 0.875,
"grad_norm": 1.2890625,
"learning_rate": 8.42266733449425e-07,
"loss": 1.103367233276367,
"step": 980,
"token_acc": 0.6671485771741006
},
{
"epoch": 0.8794642857142857,
"grad_norm": 1.328125,
"learning_rate": 7.839640414714156e-07,
"loss": 1.1081018447875977,
"step": 985,
"token_acc": 0.664527035156532
},
{
"epoch": 0.8839285714285714,
"grad_norm": 1.296875,
"learning_rate": 7.276699463136872e-07,
"loss": 1.091609001159668,
"step": 990,
"token_acc": 0.6708852005532503
},
{
"epoch": 0.8883928571428571,
"grad_norm": 1.2734375,
"learning_rate": 6.733967170408451e-07,
"loss": 1.1118325233459472,
"step": 995,
"token_acc": 0.6650144617932624
},
{
"epoch": 0.8928571428571429,
"grad_norm": 1.25,
"learning_rate": 6.211561822781476e-07,
"loss": 1.0831655502319335,
"step": 1000,
"token_acc": 0.6735403414496951
},
{
"epoch": 0.8973214285714286,
"grad_norm": 1.34375,
"learning_rate": 5.709597276335144e-07,
"loss": 1.0991512298583985,
"step": 1005,
"token_acc": 0.6691225126435695
},
{
"epoch": 0.9017857142857143,
"grad_norm": 1.3046875,
"learning_rate": 5.228182932160841e-07,
"loss": 1.108486270904541,
"step": 1010,
"token_acc": 0.6649077455770052
},
{
"epoch": 0.90625,
"grad_norm": 1.2578125,
"learning_rate": 4.7674237125185597e-07,
"loss": 1.1087259292602538,
"step": 1015,
"token_acc": 0.6655792360501174
},
{
"epoch": 0.9107142857142857,
"grad_norm": 1.2734375,
"learning_rate": 4.327420037969532e-07,
"loss": 1.1094024658203125,
"step": 1020,
"token_acc": 0.663856754611703
},
{
"epoch": 0.9151785714285714,
"grad_norm": 1.2109375,
"learning_rate": 3.908267805490051e-07,
"loss": 1.0980701446533203,
"step": 1025,
"token_acc": 0.6679018819553691
},
{
"epoch": 0.9196428571428571,
"grad_norm": 1.265625,
"learning_rate": 3.510058367571045e-07,
"loss": 1.098175048828125,
"step": 1030,
"token_acc": 0.668898687688601
},
{
"epoch": 0.9241071428571429,
"grad_norm": 1.2890625,
"learning_rate": 3.132878512308213e-07,
"loss": 1.1017606735229493,
"step": 1035,
"token_acc": 0.6671746861165542
},
{
"epoch": 0.9285714285714286,
"grad_norm": 1.234375,
"learning_rate": 2.776810444486944e-07,
"loss": 1.1003715515136718,
"step": 1040,
"token_acc": 0.6674644336916524
},
{
"epoch": 0.9330357142857143,
"grad_norm": 1.2265625,
"learning_rate": 2.441931767666084e-07,
"loss": 1.0855772972106934,
"step": 1045,
"token_acc": 0.6727498238524203
},
{
"epoch": 0.9375,
"grad_norm": 1.28125,
"learning_rate": 2.1283154672645522e-07,
"loss": 1.0910385131835938,
"step": 1050,
"token_acc": 0.6683551673944687
},
{
"epoch": 0.9419642857142857,
"grad_norm": 1.25,
"learning_rate": 1.8360298946545452e-07,
"loss": 1.0924718856811524,
"step": 1055,
"token_acc": 0.6707896340201002
},
{
"epoch": 0.9464285714285714,
"grad_norm": 1.2734375,
"learning_rate": 1.5651387522645721e-07,
"loss": 1.0851358413696288,
"step": 1060,
"token_acc": 0.6709870034475813
},
{
"epoch": 0.9508928571428571,
"grad_norm": 1.234375,
"learning_rate": 1.315701079695775e-07,
"loss": 1.0930654525756835,
"step": 1065,
"token_acc": 0.669103371531395
},
{
"epoch": 0.9553571428571429,
"grad_norm": 1.2421875,
"learning_rate": 1.0877712408545294e-07,
"loss": 1.1010807037353516,
"step": 1070,
"token_acc": 0.6683365733045945
},
{
"epoch": 0.9598214285714286,
"grad_norm": 1.2578125,
"learning_rate": 8.813989121040478e-08,
"loss": 1.1053053855895996,
"step": 1075,
"token_acc": 0.6653947493353589
},
{
"epoch": 0.9642857142857143,
"grad_norm": 1.2890625,
"learning_rate": 6.966290714375934e-08,
"loss": 1.0881473541259765,
"step": 1080,
"token_acc": 0.671083660214032
},
{
"epoch": 0.96875,
"grad_norm": 1.265625,
"learning_rate": 5.3350198867574424e-08,
"loss": 1.0896425247192383,
"step": 1085,
"token_acc": 0.6703079702533506
},
{
"epoch": 0.9732142857142857,
"grad_norm": 1.2578125,
"learning_rate": 3.9205321668972506e-08,
"loss": 1.1037522315979005,
"step": 1090,
"token_acc": 0.6670471076628307
},
{
"epoch": 0.9776785714285714,
"grad_norm": 1.25,
"learning_rate": 2.723135836528501e-08,
"loss": 1.108121109008789,
"step": 1095,
"token_acc": 0.6640236439084097
},
{
"epoch": 0.9821428571428571,
"grad_norm": 1.296875,
"learning_rate": 1.7430918632157513e-08,
"loss": 1.0776978492736817,
"step": 1100,
"token_acc": 0.6742684264534163
},
{
"epoch": 0.9866071428571429,
"grad_norm": 1.3125,
"learning_rate": 9.80613843479361e-09,
"loss": 1.1027990341186524,
"step": 1105,
"token_acc": 0.6674602942466875
},
{
"epoch": 0.9910714285714286,
"grad_norm": 1.3203125,
"learning_rate": 4.358679562416202e-09,
"loss": 1.0937559127807617,
"step": 1110,
"token_acc": 0.670395055464588
},
{
"epoch": 0.9955357142857143,
"grad_norm": 1.2578125,
"learning_rate": 1.0897292660971836e-09,
"loss": 1.0890558242797852,
"step": 1115,
"token_acc": 0.6709881013358767
},
{
"epoch": 1.0,
"grad_norm": 1.328125,
"learning_rate": 0.0,
"loss": 1.0961315155029296,
"step": 1120,
"token_acc": 0.6685871908005899
}
],
"logging_steps": 5,
"max_steps": 1120,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8781764718816133e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}